iommu/vt-d: Always enable PASID/PRI PCI capabilities before ATS
[linux-2.6-block.git] / drivers / iommu / intel-iommu.c
CommitLineData
ba395927 1/*
ea8ea460 2 * Copyright © 2006-2014 Intel Corporation.
ba395927
KA
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
ea8ea460
DW
13 * Authors: David Woodhouse <dwmw2@infradead.org>,
14 * Ashok Raj <ashok.raj@intel.com>,
15 * Shaohua Li <shaohua.li@intel.com>,
16 * Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>,
17 * Fenghua Yu <fenghua.yu@intel.com>
9f10e5bf 18 * Joerg Roedel <jroedel@suse.de>
ba395927
KA
19 */
20
9f10e5bf
JR
21#define pr_fmt(fmt) "DMAR: " fmt
22
ba395927
KA
23#include <linux/init.h>
24#include <linux/bitmap.h>
5e0d2a6f 25#include <linux/debugfs.h>
54485c30 26#include <linux/export.h>
ba395927
KA
27#include <linux/slab.h>
28#include <linux/irq.h>
29#include <linux/interrupt.h>
ba395927
KA
30#include <linux/spinlock.h>
31#include <linux/pci.h>
32#include <linux/dmar.h>
33#include <linux/dma-mapping.h>
34#include <linux/mempool.h>
75f05569 35#include <linux/memory.h>
5e0d2a6f 36#include <linux/timer.h>
38717946 37#include <linux/iova.h>
5d450806 38#include <linux/iommu.h>
38717946 39#include <linux/intel-iommu.h>
134fac3f 40#include <linux/syscore_ops.h>
69575d38 41#include <linux/tboot.h>
adb2fe02 42#include <linux/dmi.h>
5cdede24 43#include <linux/pci-ats.h>
0ee332c1 44#include <linux/memblock.h>
36746436 45#include <linux/dma-contiguous.h>
091d42e4 46#include <linux/crash_dump.h>
8a8f422d 47#include <asm/irq_remapping.h>
ba395927 48#include <asm/cacheflush.h>
46a7fa27 49#include <asm/iommu.h>
ba395927 50
078e1ee2
JR
51#include "irq_remapping.h"
52
5b6985ce
FY
53#define ROOT_SIZE VTD_PAGE_SIZE
54#define CONTEXT_SIZE VTD_PAGE_SIZE
55
ba395927 56#define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
18436afd 57#define IS_USB_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_SERIAL_USB)
ba395927 58#define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
e0fc7e0b 59#define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
ba395927
KA
60
61#define IOAPIC_RANGE_START (0xfee00000)
62#define IOAPIC_RANGE_END (0xfeefffff)
63#define IOVA_START_ADDR (0x1000)
64
65#define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
66
4ed0d3e6 67#define MAX_AGAW_WIDTH 64
5c645b35 68#define MAX_AGAW_PFN_WIDTH (MAX_AGAW_WIDTH - VTD_PAGE_SHIFT)
4ed0d3e6 69
2ebe3151
DW
70#define __DOMAIN_MAX_PFN(gaw) ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
71#define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1)
72
73/* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
74 to match. That way, we can use 'unsigned long' for PFNs with impunity. */
75#define DOMAIN_MAX_PFN(gaw) ((unsigned long) min_t(uint64_t, \
76 __DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
77#define DOMAIN_MAX_ADDR(gaw) (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
ba395927 78
1b722500
RM
79/* IO virtual address start page frame number */
80#define IOVA_START_PFN (1)
81
f27be03b 82#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
284901a9 83#define DMA_32BIT_PFN IOVA_PFN(DMA_BIT_MASK(32))
6a35528a 84#define DMA_64BIT_PFN IOVA_PFN(DMA_BIT_MASK(64))
5e0d2a6f 85
df08cdc7
AM
86/* page table handling */
87#define LEVEL_STRIDE (9)
88#define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
89
6d1c56a9
OBC
90/*
91 * This bitmap is used to advertise the page sizes our hardware support
92 * to the IOMMU core, which will then use this information to split
93 * physically contiguous memory regions it is mapping into page sizes
94 * that we support.
95 *
96 * Traditionally the IOMMU core just handed us the mappings directly,
97 * after making sure the size is an order of a 4KiB page and that the
98 * mapping has natural alignment.
99 *
100 * To retain this behavior, we currently advertise that we support
101 * all page sizes that are an order of 4KiB.
102 *
103 * If at some point we'd like to utilize the IOMMU core's new behavior,
104 * we could change this to advertise the real page sizes we support.
105 */
106#define INTEL_IOMMU_PGSIZES (~0xFFFUL)
107
df08cdc7
AM
108static inline int agaw_to_level(int agaw)
109{
110 return agaw + 2;
111}
112
113static inline int agaw_to_width(int agaw)
114{
5c645b35 115 return min_t(int, 30 + agaw * LEVEL_STRIDE, MAX_AGAW_WIDTH);
df08cdc7
AM
116}
117
118static inline int width_to_agaw(int width)
119{
5c645b35 120 return DIV_ROUND_UP(width - 30, LEVEL_STRIDE);
df08cdc7
AM
121}
122
123static inline unsigned int level_to_offset_bits(int level)
124{
125 return (level - 1) * LEVEL_STRIDE;
126}
127
128static inline int pfn_level_offset(unsigned long pfn, int level)
129{
130 return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
131}
132
133static inline unsigned long level_mask(int level)
134{
135 return -1UL << level_to_offset_bits(level);
136}
137
138static inline unsigned long level_size(int level)
139{
140 return 1UL << level_to_offset_bits(level);
141}
142
143static inline unsigned long align_to_level(unsigned long pfn, int level)
144{
145 return (pfn + level_size(level) - 1) & level_mask(level);
146}
fd18de50 147
6dd9a7c7
YS
148static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
149{
5c645b35 150 return 1 << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH);
6dd9a7c7
YS
151}
152
dd4e8319
DW
153/* VT-d pages must always be _smaller_ than MM pages. Otherwise things
154 are never going to work. */
155static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
156{
157 return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT);
158}
159
160static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
161{
162 return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
163}
164static inline unsigned long page_to_dma_pfn(struct page *pg)
165{
166 return mm_to_dma_pfn(page_to_pfn(pg));
167}
168static inline unsigned long virt_to_dma_pfn(void *p)
169{
170 return page_to_dma_pfn(virt_to_page(p));
171}
172
d9630fe9
WH
173/* global iommu list, set NULL for ignored DMAR units */
174static struct intel_iommu **g_iommus;
175
e0fc7e0b 176static void __init check_tylersburg_isoch(void);
9af88143
DW
177static int rwbf_quirk;
178
b779260b
JC
179/*
180 * set to 1 to panic kernel if can't successfully enable VT-d
181 * (used when kernel is launched w/ TXT)
182 */
183static int force_on = 0;
184
46b08e1a
MM
185/*
186 * 0: Present
187 * 1-11: Reserved
188 * 12-63: Context Ptr (12 - (haw-1))
189 * 64-127: Reserved
190 */
191struct root_entry {
03ecc32c
DW
192 u64 lo;
193 u64 hi;
46b08e1a
MM
194};
195#define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
46b08e1a 196
091d42e4
JR
197/*
198 * Take a root_entry and return the Lower Context Table Pointer (LCTP)
199 * if marked present.
200 */
201static phys_addr_t root_entry_lctp(struct root_entry *re)
202{
203 if (!(re->lo & 1))
204 return 0;
205
206 return re->lo & VTD_PAGE_MASK;
207}
208
209/*
210 * Take a root_entry and return the Upper Context Table Pointer (UCTP)
211 * if marked present.
212 */
213static phys_addr_t root_entry_uctp(struct root_entry *re)
214{
215 if (!(re->hi & 1))
216 return 0;
46b08e1a 217
091d42e4
JR
218 return re->hi & VTD_PAGE_MASK;
219}
7a8fc25e
MM
220/*
221 * low 64 bits:
222 * 0: present
223 * 1: fault processing disable
224 * 2-3: translation type
225 * 12-63: address space root
226 * high 64 bits:
227 * 0-2: address width
228 * 3-6: aval
229 * 8-23: domain id
230 */
231struct context_entry {
232 u64 lo;
233 u64 hi;
234};
c07e7d21 235
cf484d0e
JR
236static inline void context_clear_pasid_enable(struct context_entry *context)
237{
238 context->lo &= ~(1ULL << 11);
239}
240
241static inline bool context_pasid_enabled(struct context_entry *context)
242{
243 return !!(context->lo & (1ULL << 11));
244}
245
246static inline void context_set_copied(struct context_entry *context)
247{
248 context->hi |= (1ull << 3);
249}
250
251static inline bool context_copied(struct context_entry *context)
252{
253 return !!(context->hi & (1ULL << 3));
254}
255
256static inline bool __context_present(struct context_entry *context)
c07e7d21
MM
257{
258 return (context->lo & 1);
259}
cf484d0e
JR
260
261static inline bool context_present(struct context_entry *context)
262{
263 return context_pasid_enabled(context) ?
264 __context_present(context) :
265 __context_present(context) && !context_copied(context);
266}
267
c07e7d21
MM
268static inline void context_set_present(struct context_entry *context)
269{
270 context->lo |= 1;
271}
272
273static inline void context_set_fault_enable(struct context_entry *context)
274{
275 context->lo &= (((u64)-1) << 2) | 1;
276}
277
c07e7d21
MM
278static inline void context_set_translation_type(struct context_entry *context,
279 unsigned long value)
280{
281 context->lo &= (((u64)-1) << 4) | 3;
282 context->lo |= (value & 3) << 2;
283}
284
285static inline void context_set_address_root(struct context_entry *context,
286 unsigned long value)
287{
1a2262f9 288 context->lo &= ~VTD_PAGE_MASK;
c07e7d21
MM
289 context->lo |= value & VTD_PAGE_MASK;
290}
291
292static inline void context_set_address_width(struct context_entry *context,
293 unsigned long value)
294{
295 context->hi |= value & 7;
296}
297
298static inline void context_set_domain_id(struct context_entry *context,
299 unsigned long value)
300{
301 context->hi |= (value & ((1 << 16) - 1)) << 8;
302}
303
dbcd861f
JR
304static inline int context_domain_id(struct context_entry *c)
305{
306 return((c->hi >> 8) & 0xffff);
307}
308
c07e7d21
MM
309static inline void context_clear_entry(struct context_entry *context)
310{
311 context->lo = 0;
312 context->hi = 0;
313}
7a8fc25e 314
622ba12a
MM
315/*
316 * 0: readable
317 * 1: writable
318 * 2-6: reserved
319 * 7: super page
9cf06697
SY
320 * 8-10: available
321 * 11: snoop behavior
622ba12a
MM
322 * 12-63: Host physcial address
323 */
324struct dma_pte {
325 u64 val;
326};
622ba12a 327
19c239ce
MM
328static inline void dma_clear_pte(struct dma_pte *pte)
329{
330 pte->val = 0;
331}
332
19c239ce
MM
333static inline u64 dma_pte_addr(struct dma_pte *pte)
334{
c85994e4
DW
335#ifdef CONFIG_64BIT
336 return pte->val & VTD_PAGE_MASK;
337#else
338 /* Must have a full atomic 64-bit read */
1a8bd481 339 return __cmpxchg64(&pte->val, 0ULL, 0ULL) & VTD_PAGE_MASK;
c85994e4 340#endif
19c239ce
MM
341}
342
19c239ce
MM
343static inline bool dma_pte_present(struct dma_pte *pte)
344{
345 return (pte->val & 3) != 0;
346}
622ba12a 347
4399c8bf
AK
348static inline bool dma_pte_superpage(struct dma_pte *pte)
349{
c3c75eb7 350 return (pte->val & DMA_PTE_LARGE_PAGE);
4399c8bf
AK
351}
352
75e6bf96
DW
353static inline int first_pte_in_page(struct dma_pte *pte)
354{
355 return !((unsigned long)pte & ~VTD_PAGE_MASK);
356}
357
2c2e2c38
FY
358/*
359 * This domain is a statically identity mapping domain.
360 * 1. This domain creats a static 1:1 mapping to all usable memory.
361 * 2. It maps to each iommu if successful.
362 * 3. Each iommu mapps to this domain if successful.
363 */
19943b0e
DW
364static struct dmar_domain *si_domain;
365static int hw_pass_through = 1;
2c2e2c38 366
28ccce0d
JR
367/*
368 * Domain represents a virtual machine, more than one devices
1ce28feb
WH
369 * across iommus may be owned in one domain, e.g. kvm guest.
370 */
ab8dfe25 371#define DOMAIN_FLAG_VIRTUAL_MACHINE (1 << 0)
1ce28feb 372
2c2e2c38 373/* si_domain contains mulitple devices */
ab8dfe25 374#define DOMAIN_FLAG_STATIC_IDENTITY (1 << 1)
2c2e2c38 375
29a27719
JR
376#define for_each_domain_iommu(idx, domain) \
377 for (idx = 0; idx < g_num_of_iommus; idx++) \
378 if (domain->iommu_refcnt[idx])
379
99126f7c 380struct dmar_domain {
4c923d47 381 int nid; /* node id */
29a27719
JR
382
383 unsigned iommu_refcnt[DMAR_UNITS_SUPPORTED];
384 /* Refcount of devices per iommu */
385
99126f7c 386
c0e8a6c8
JR
387 u16 iommu_did[DMAR_UNITS_SUPPORTED];
388 /* Domain ids per IOMMU. Use u16 since
389 * domain ids are 16 bit wide according
390 * to VT-d spec, section 9.3 */
99126f7c 391
00a77deb 392 struct list_head devices; /* all devices' list */
99126f7c
MM
393 struct iova_domain iovad; /* iova's that belong to this domain */
394
395 struct dma_pte *pgd; /* virtual address */
99126f7c
MM
396 int gaw; /* max guest address width */
397
398 /* adjusted guest address width, 0 is level 2 30-bit */
399 int agaw;
400
3b5410e7 401 int flags; /* flags to find out type of domain */
8e604097
WH
402
403 int iommu_coherency;/* indicate coherency of iommu access */
58c610bd 404 int iommu_snooping; /* indicate snooping control feature*/
c7151a8d 405 int iommu_count; /* reference count of iommu */
6dd9a7c7
YS
406 int iommu_superpage;/* Level of superpages supported:
407 0 == 4KiB (no superpages), 1 == 2MiB,
408 2 == 1GiB, 3 == 512GiB, 4 == 1TiB */
fe40f1e0 409 u64 max_addr; /* maximum mapped address */
00a77deb
JR
410
411 struct iommu_domain domain; /* generic domain data structure for
412 iommu core */
99126f7c
MM
413};
414
a647dacb
MM
415/* PCI domain-device relationship */
416struct device_domain_info {
417 struct list_head link; /* link to domain siblings */
418 struct list_head global; /* link to global list */
276dbf99 419 u8 bus; /* PCI bus number */
a647dacb 420 u8 devfn; /* PCI devfn number */
b16d0cb9
DW
421 u8 pasid_supported:3;
422 u8 pasid_enabled:1;
423 u8 pri_supported:1;
424 u8 pri_enabled:1;
425 u8 ats_supported:1;
426 u8 ats_enabled:1;
427 u8 ats_qdep;
0bcb3e28 428 struct device *dev; /* it's NULL for PCIe-to-PCI bridge */
93a23a72 429 struct intel_iommu *iommu; /* IOMMU used by this device */
a647dacb
MM
430 struct dmar_domain *domain; /* pointer to domain */
431};
432
b94e4117
JL
433struct dmar_rmrr_unit {
434 struct list_head list; /* list of rmrr units */
435 struct acpi_dmar_header *hdr; /* ACPI header */
436 u64 base_address; /* reserved base address*/
437 u64 end_address; /* reserved end address */
832bd858 438 struct dmar_dev_scope *devices; /* target devices */
b94e4117
JL
439 int devices_cnt; /* target device count */
440};
441
442struct dmar_atsr_unit {
443 struct list_head list; /* list of ATSR units */
444 struct acpi_dmar_header *hdr; /* ACPI header */
832bd858 445 struct dmar_dev_scope *devices; /* target devices */
b94e4117
JL
446 int devices_cnt; /* target device count */
447 u8 include_all:1; /* include all ports */
448};
449
450static LIST_HEAD(dmar_atsr_units);
451static LIST_HEAD(dmar_rmrr_units);
452
453#define for_each_rmrr_units(rmrr) \
454 list_for_each_entry(rmrr, &dmar_rmrr_units, list)
455
5e0d2a6f 456static void flush_unmaps_timeout(unsigned long data);
457
b707cb02 458static DEFINE_TIMER(unmap_timer, flush_unmaps_timeout, 0, 0);
5e0d2a6f 459
80b20dd8 460#define HIGH_WATER_MARK 250
461struct deferred_flush_tables {
462 int next;
463 struct iova *iova[HIGH_WATER_MARK];
464 struct dmar_domain *domain[HIGH_WATER_MARK];
ea8ea460 465 struct page *freelist[HIGH_WATER_MARK];
80b20dd8 466};
467
468static struct deferred_flush_tables *deferred_flush;
469
5e0d2a6f 470/* bitmap for indexing intel_iommus */
5e0d2a6f 471static int g_num_of_iommus;
472
473static DEFINE_SPINLOCK(async_umap_flush_lock);
474static LIST_HEAD(unmaps_to_do);
475
476static int timer_on;
477static long list_size;
5e0d2a6f 478
92d03cc8 479static void domain_exit(struct dmar_domain *domain);
ba395927 480static void domain_remove_dev_info(struct dmar_domain *domain);
e6de0f8d
JR
481static void dmar_remove_one_dev_info(struct dmar_domain *domain,
482 struct device *dev);
127c7615 483static void __dmar_remove_one_dev_info(struct device_domain_info *info);
2452d9db
JR
484static void domain_context_clear(struct intel_iommu *iommu,
485 struct device *dev);
2a46ddf7
JL
486static int domain_detach_iommu(struct dmar_domain *domain,
487 struct intel_iommu *iommu);
ba395927 488
d3f13810 489#ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON
0cd5c3c8
KM
490int dmar_disabled = 0;
491#else
492int dmar_disabled = 1;
d3f13810 493#endif /*CONFIG_INTEL_IOMMU_DEFAULT_ON*/
0cd5c3c8 494
8bc1f85c
ED
495int intel_iommu_enabled = 0;
496EXPORT_SYMBOL_GPL(intel_iommu_enabled);
497
2d9e667e 498static int dmar_map_gfx = 1;
7d3b03ce 499static int dmar_forcedac;
5e0d2a6f 500static int intel_iommu_strict;
6dd9a7c7 501static int intel_iommu_superpage = 1;
c83b2f20 502static int intel_iommu_ecs = 1;
ae853ddb
DW
503static int intel_iommu_pasid28;
504static int iommu_identity_mapping;
505
506#define IDENTMAP_ALL 1
507#define IDENTMAP_GFX 2
508#define IDENTMAP_AZALIA 4
c83b2f20
DW
509
510/* We only actually use ECS when PASID support (on the new bit 40)
511 * is also advertised. Some early implementations — the ones with
512 * PASID support on bit 28 — have issues even when we *only* use
513 * extended root/context tables. */
ae853ddb
DW
514#define pasid_enabled(iommu) (ecap_pasid(iommu->ecap) || \
515 (intel_iommu_pasid28 && ecap_broken_pasid(iommu->ecap)))
c83b2f20 516#define ecs_enabled(iommu) (intel_iommu_ecs && ecap_ecs(iommu->ecap) && \
ae853ddb 517 pasid_enabled(iommu))
ba395927 518
c0771df8
DW
519int intel_iommu_gfx_mapped;
520EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
521
ba395927
KA
522#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
523static DEFINE_SPINLOCK(device_domain_lock);
524static LIST_HEAD(device_domain_list);
525
b22f6434 526static const struct iommu_ops intel_iommu_ops;
a8bcbb0d 527
4158c2ec
JR
528static bool translation_pre_enabled(struct intel_iommu *iommu)
529{
530 return (iommu->flags & VTD_FLAG_TRANS_PRE_ENABLED);
531}
532
091d42e4
JR
533static void clear_translation_pre_enabled(struct intel_iommu *iommu)
534{
535 iommu->flags &= ~VTD_FLAG_TRANS_PRE_ENABLED;
536}
537
4158c2ec
JR
538static void init_translation_status(struct intel_iommu *iommu)
539{
540 u32 gsts;
541
542 gsts = readl(iommu->reg + DMAR_GSTS_REG);
543 if (gsts & DMA_GSTS_TES)
544 iommu->flags |= VTD_FLAG_TRANS_PRE_ENABLED;
545}
546
00a77deb
JR
547/* Convert generic 'struct iommu_domain to private struct dmar_domain */
548static struct dmar_domain *to_dmar_domain(struct iommu_domain *dom)
549{
550 return container_of(dom, struct dmar_domain, domain);
551}
552
ba395927
KA
553static int __init intel_iommu_setup(char *str)
554{
555 if (!str)
556 return -EINVAL;
557 while (*str) {
0cd5c3c8
KM
558 if (!strncmp(str, "on", 2)) {
559 dmar_disabled = 0;
9f10e5bf 560 pr_info("IOMMU enabled\n");
0cd5c3c8 561 } else if (!strncmp(str, "off", 3)) {
ba395927 562 dmar_disabled = 1;
9f10e5bf 563 pr_info("IOMMU disabled\n");
ba395927
KA
564 } else if (!strncmp(str, "igfx_off", 8)) {
565 dmar_map_gfx = 0;
9f10e5bf 566 pr_info("Disable GFX device mapping\n");
7d3b03ce 567 } else if (!strncmp(str, "forcedac", 8)) {
9f10e5bf 568 pr_info("Forcing DAC for PCI devices\n");
7d3b03ce 569 dmar_forcedac = 1;
5e0d2a6f 570 } else if (!strncmp(str, "strict", 6)) {
9f10e5bf 571 pr_info("Disable batched IOTLB flush\n");
5e0d2a6f 572 intel_iommu_strict = 1;
6dd9a7c7 573 } else if (!strncmp(str, "sp_off", 6)) {
9f10e5bf 574 pr_info("Disable supported super page\n");
6dd9a7c7 575 intel_iommu_superpage = 0;
c83b2f20
DW
576 } else if (!strncmp(str, "ecs_off", 7)) {
577 printk(KERN_INFO
578 "Intel-IOMMU: disable extended context table support\n");
579 intel_iommu_ecs = 0;
ae853ddb
DW
580 } else if (!strncmp(str, "pasid28", 7)) {
581 printk(KERN_INFO
582 "Intel-IOMMU: enable pre-production PASID support\n");
583 intel_iommu_pasid28 = 1;
584 iommu_identity_mapping |= IDENTMAP_GFX;
ba395927
KA
585 }
586
587 str += strcspn(str, ",");
588 while (*str == ',')
589 str++;
590 }
591 return 0;
592}
593__setup("intel_iommu=", intel_iommu_setup);
594
595static struct kmem_cache *iommu_domain_cache;
596static struct kmem_cache *iommu_devinfo_cache;
ba395927 597
9452d5bf
JR
598static struct dmar_domain* get_iommu_domain(struct intel_iommu *iommu, u16 did)
599{
8bf47816
JR
600 struct dmar_domain **domains;
601 int idx = did >> 8;
602
603 domains = iommu->domains[idx];
604 if (!domains)
605 return NULL;
606
607 return domains[did & 0xff];
9452d5bf
JR
608}
609
610static void set_iommu_domain(struct intel_iommu *iommu, u16 did,
611 struct dmar_domain *domain)
612{
8bf47816
JR
613 struct dmar_domain **domains;
614 int idx = did >> 8;
615
616 if (!iommu->domains[idx]) {
617 size_t size = 256 * sizeof(struct dmar_domain *);
618 iommu->domains[idx] = kzalloc(size, GFP_ATOMIC);
619 }
620
621 domains = iommu->domains[idx];
622 if (WARN_ON(!domains))
623 return;
624 else
625 domains[did & 0xff] = domain;
9452d5bf
JR
626}
627
4c923d47 628static inline void *alloc_pgtable_page(int node)
eb3fa7cb 629{
4c923d47
SS
630 struct page *page;
631 void *vaddr = NULL;
eb3fa7cb 632
4c923d47
SS
633 page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0);
634 if (page)
635 vaddr = page_address(page);
eb3fa7cb 636 return vaddr;
ba395927
KA
637}
638
639static inline void free_pgtable_page(void *vaddr)
640{
641 free_page((unsigned long)vaddr);
642}
643
644static inline void *alloc_domain_mem(void)
645{
354bb65e 646 return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC);
ba395927
KA
647}
648
38717946 649static void free_domain_mem(void *vaddr)
ba395927
KA
650{
651 kmem_cache_free(iommu_domain_cache, vaddr);
652}
653
654static inline void * alloc_devinfo_mem(void)
655{
354bb65e 656 return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC);
ba395927
KA
657}
658
659static inline void free_devinfo_mem(void *vaddr)
660{
661 kmem_cache_free(iommu_devinfo_cache, vaddr);
662}
663
ab8dfe25
JL
664static inline int domain_type_is_vm(struct dmar_domain *domain)
665{
666 return domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE;
667}
668
28ccce0d
JR
669static inline int domain_type_is_si(struct dmar_domain *domain)
670{
671 return domain->flags & DOMAIN_FLAG_STATIC_IDENTITY;
672}
673
ab8dfe25
JL
674static inline int domain_type_is_vm_or_si(struct dmar_domain *domain)
675{
676 return domain->flags & (DOMAIN_FLAG_VIRTUAL_MACHINE |
677 DOMAIN_FLAG_STATIC_IDENTITY);
678}
1b573683 679
162d1b10
JL
680static inline int domain_pfn_supported(struct dmar_domain *domain,
681 unsigned long pfn)
682{
683 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
684
685 return !(addr_width < BITS_PER_LONG && pfn >> addr_width);
686}
687
4ed0d3e6 688static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
1b573683
WH
689{
690 unsigned long sagaw;
691 int agaw = -1;
692
693 sagaw = cap_sagaw(iommu->cap);
4ed0d3e6 694 for (agaw = width_to_agaw(max_gaw);
1b573683
WH
695 agaw >= 0; agaw--) {
696 if (test_bit(agaw, &sagaw))
697 break;
698 }
699
700 return agaw;
701}
702
4ed0d3e6
FY
703/*
704 * Calculate max SAGAW for each iommu.
705 */
706int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
707{
708 return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
709}
710
711/*
712 * calculate agaw for each iommu.
713 * "SAGAW" may be different across iommus, use a default agaw, and
714 * get a supported less agaw for iommus that don't support the default agaw.
715 */
716int iommu_calculate_agaw(struct intel_iommu *iommu)
717{
718 return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
719}
720
2c2e2c38 721/* This functionin only returns single iommu in a domain */
8c11e798
WH
722static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
723{
724 int iommu_id;
725
2c2e2c38 726 /* si_domain and vm domain should not get here. */
ab8dfe25 727 BUG_ON(domain_type_is_vm_or_si(domain));
29a27719
JR
728 for_each_domain_iommu(iommu_id, domain)
729 break;
730
8c11e798
WH
731 if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
732 return NULL;
733
734 return g_iommus[iommu_id];
735}
736
8e604097
WH
737static void domain_update_iommu_coherency(struct dmar_domain *domain)
738{
d0501960
DW
739 struct dmar_drhd_unit *drhd;
740 struct intel_iommu *iommu;
2f119c78
QL
741 bool found = false;
742 int i;
2e12bc29 743
d0501960 744 domain->iommu_coherency = 1;
8e604097 745
29a27719 746 for_each_domain_iommu(i, domain) {
2f119c78 747 found = true;
8e604097
WH
748 if (!ecap_coherent(g_iommus[i]->ecap)) {
749 domain->iommu_coherency = 0;
750 break;
751 }
8e604097 752 }
d0501960
DW
753 if (found)
754 return;
755
756 /* No hardware attached; use lowest common denominator */
757 rcu_read_lock();
758 for_each_active_iommu(iommu, drhd) {
759 if (!ecap_coherent(iommu->ecap)) {
760 domain->iommu_coherency = 0;
761 break;
762 }
763 }
764 rcu_read_unlock();
8e604097
WH
765}
766
161f6934 767static int domain_update_iommu_snooping(struct intel_iommu *skip)
58c610bd 768{
161f6934
JL
769 struct dmar_drhd_unit *drhd;
770 struct intel_iommu *iommu;
771 int ret = 1;
58c610bd 772
161f6934
JL
773 rcu_read_lock();
774 for_each_active_iommu(iommu, drhd) {
775 if (iommu != skip) {
776 if (!ecap_sc_support(iommu->ecap)) {
777 ret = 0;
778 break;
779 }
58c610bd 780 }
58c610bd 781 }
161f6934
JL
782 rcu_read_unlock();
783
784 return ret;
58c610bd
SY
785}
786
161f6934 787static int domain_update_iommu_superpage(struct intel_iommu *skip)
6dd9a7c7 788{
8140a95d 789 struct dmar_drhd_unit *drhd;
161f6934 790 struct intel_iommu *iommu;
8140a95d 791 int mask = 0xf;
6dd9a7c7
YS
792
793 if (!intel_iommu_superpage) {
161f6934 794 return 0;
6dd9a7c7
YS
795 }
796
8140a95d 797 /* set iommu_superpage to the smallest common denominator */
0e242612 798 rcu_read_lock();
8140a95d 799 for_each_active_iommu(iommu, drhd) {
161f6934
JL
800 if (iommu != skip) {
801 mask &= cap_super_page_val(iommu->cap);
802 if (!mask)
803 break;
6dd9a7c7
YS
804 }
805 }
0e242612
JL
806 rcu_read_unlock();
807
161f6934 808 return fls(mask);
6dd9a7c7
YS
809}
810
58c610bd
SY
811/* Some capabilities may be different across iommus */
812static void domain_update_iommu_cap(struct dmar_domain *domain)
813{
814 domain_update_iommu_coherency(domain);
161f6934
JL
815 domain->iommu_snooping = domain_update_iommu_snooping(NULL);
816 domain->iommu_superpage = domain_update_iommu_superpage(NULL);
58c610bd
SY
817}
818
03ecc32c
DW
819static inline struct context_entry *iommu_context_addr(struct intel_iommu *iommu,
820 u8 bus, u8 devfn, int alloc)
821{
822 struct root_entry *root = &iommu->root_entry[bus];
823 struct context_entry *context;
824 u64 *entry;
825
4df4eab1 826 entry = &root->lo;
c83b2f20 827 if (ecs_enabled(iommu)) {
03ecc32c
DW
828 if (devfn >= 0x80) {
829 devfn -= 0x80;
830 entry = &root->hi;
831 }
832 devfn *= 2;
833 }
03ecc32c
DW
834 if (*entry & 1)
835 context = phys_to_virt(*entry & VTD_PAGE_MASK);
836 else {
837 unsigned long phy_addr;
838 if (!alloc)
839 return NULL;
840
841 context = alloc_pgtable_page(iommu->node);
842 if (!context)
843 return NULL;
844
845 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
846 phy_addr = virt_to_phys((void *)context);
847 *entry = phy_addr | 1;
848 __iommu_flush_cache(iommu, entry, sizeof(*entry));
849 }
850 return &context[devfn];
851}
852
4ed6a540
DW
853static int iommu_dummy(struct device *dev)
854{
855 return dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
856}
857
156baca8 858static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn)
c7151a8d
WH
859{
860 struct dmar_drhd_unit *drhd = NULL;
b683b230 861 struct intel_iommu *iommu;
156baca8
DW
862 struct device *tmp;
863 struct pci_dev *ptmp, *pdev = NULL;
aa4d066a 864 u16 segment = 0;
c7151a8d
WH
865 int i;
866
4ed6a540
DW
867 if (iommu_dummy(dev))
868 return NULL;
869
156baca8
DW
870 if (dev_is_pci(dev)) {
871 pdev = to_pci_dev(dev);
872 segment = pci_domain_nr(pdev->bus);
ca5b74d2 873 } else if (has_acpi_companion(dev))
156baca8
DW
874 dev = &ACPI_COMPANION(dev)->dev;
875
0e242612 876 rcu_read_lock();
b683b230 877 for_each_active_iommu(iommu, drhd) {
156baca8 878 if (pdev && segment != drhd->segment)
276dbf99 879 continue;
c7151a8d 880
b683b230 881 for_each_active_dev_scope(drhd->devices,
156baca8
DW
882 drhd->devices_cnt, i, tmp) {
883 if (tmp == dev) {
884 *bus = drhd->devices[i].bus;
885 *devfn = drhd->devices[i].devfn;
b683b230 886 goto out;
156baca8
DW
887 }
888
889 if (!pdev || !dev_is_pci(tmp))
890 continue;
891
892 ptmp = to_pci_dev(tmp);
893 if (ptmp->subordinate &&
894 ptmp->subordinate->number <= pdev->bus->number &&
895 ptmp->subordinate->busn_res.end >= pdev->bus->number)
896 goto got_pdev;
924b6231 897 }
c7151a8d 898
156baca8
DW
899 if (pdev && drhd->include_all) {
900 got_pdev:
901 *bus = pdev->bus->number;
902 *devfn = pdev->devfn;
b683b230 903 goto out;
156baca8 904 }
c7151a8d 905 }
b683b230 906 iommu = NULL;
156baca8 907 out:
0e242612 908 rcu_read_unlock();
c7151a8d 909
b683b230 910 return iommu;
c7151a8d
WH
911}
912
5331fe6f
WH
913static void domain_flush_cache(struct dmar_domain *domain,
914 void *addr, int size)
915{
916 if (!domain->iommu_coherency)
917 clflush_cache_range(addr, size);
918}
919
ba395927
KA
920static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
921{
ba395927 922 struct context_entry *context;
03ecc32c 923 int ret = 0;
ba395927
KA
924 unsigned long flags;
925
926 spin_lock_irqsave(&iommu->lock, flags);
03ecc32c
DW
927 context = iommu_context_addr(iommu, bus, devfn, 0);
928 if (context)
929 ret = context_present(context);
ba395927
KA
930 spin_unlock_irqrestore(&iommu->lock, flags);
931 return ret;
932}
933
934static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
935{
ba395927
KA
936 struct context_entry *context;
937 unsigned long flags;
938
939 spin_lock_irqsave(&iommu->lock, flags);
03ecc32c 940 context = iommu_context_addr(iommu, bus, devfn, 0);
ba395927 941 if (context) {
03ecc32c
DW
942 context_clear_entry(context);
943 __iommu_flush_cache(iommu, context, sizeof(*context));
ba395927
KA
944 }
945 spin_unlock_irqrestore(&iommu->lock, flags);
946}
947
948static void free_context_table(struct intel_iommu *iommu)
949{
ba395927
KA
950 int i;
951 unsigned long flags;
952 struct context_entry *context;
953
954 spin_lock_irqsave(&iommu->lock, flags);
955 if (!iommu->root_entry) {
956 goto out;
957 }
958 for (i = 0; i < ROOT_ENTRY_NR; i++) {
03ecc32c 959 context = iommu_context_addr(iommu, i, 0, 0);
ba395927
KA
960 if (context)
961 free_pgtable_page(context);
03ecc32c 962
c83b2f20 963 if (!ecs_enabled(iommu))
03ecc32c
DW
964 continue;
965
966 context = iommu_context_addr(iommu, i, 0x80, 0);
967 if (context)
968 free_pgtable_page(context);
969
ba395927
KA
970 }
971 free_pgtable_page(iommu->root_entry);
972 iommu->root_entry = NULL;
973out:
974 spin_unlock_irqrestore(&iommu->lock, flags);
975}
976
b026fd28 977static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
5cf0a76f 978 unsigned long pfn, int *target_level)
ba395927 979{
ba395927
KA
980 struct dma_pte *parent, *pte = NULL;
981 int level = agaw_to_level(domain->agaw);
4399c8bf 982 int offset;
ba395927
KA
983
984 BUG_ON(!domain->pgd);
f9423606 985
162d1b10 986 if (!domain_pfn_supported(domain, pfn))
f9423606
JS
987 /* Address beyond IOMMU's addressing capabilities. */
988 return NULL;
989
ba395927
KA
990 parent = domain->pgd;
991
5cf0a76f 992 while (1) {
ba395927
KA
993 void *tmp_page;
994
b026fd28 995 offset = pfn_level_offset(pfn, level);
ba395927 996 pte = &parent[offset];
5cf0a76f 997 if (!*target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte)))
6dd9a7c7 998 break;
5cf0a76f 999 if (level == *target_level)
ba395927
KA
1000 break;
1001
19c239ce 1002 if (!dma_pte_present(pte)) {
c85994e4
DW
1003 uint64_t pteval;
1004
4c923d47 1005 tmp_page = alloc_pgtable_page(domain->nid);
ba395927 1006
206a73c1 1007 if (!tmp_page)
ba395927 1008 return NULL;
206a73c1 1009
c85994e4 1010 domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
64de5af0 1011 pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
effad4b5 1012 if (cmpxchg64(&pte->val, 0ULL, pteval))
c85994e4
DW
1013 /* Someone else set it while we were thinking; use theirs. */
1014 free_pgtable_page(tmp_page);
effad4b5 1015 else
c85994e4 1016 domain_flush_cache(domain, pte, sizeof(*pte));
ba395927 1017 }
5cf0a76f
DW
1018 if (level == 1)
1019 break;
1020
19c239ce 1021 parent = phys_to_virt(dma_pte_addr(pte));
ba395927
KA
1022 level--;
1023 }
1024
5cf0a76f
DW
1025 if (!*target_level)
1026 *target_level = level;
1027
ba395927
KA
1028 return pte;
1029}
1030
6dd9a7c7 1031
ba395927 1032/* return address's pte at specific level */
90dcfb5e
DW
1033static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
1034 unsigned long pfn,
6dd9a7c7 1035 int level, int *large_page)
ba395927
KA
1036{
1037 struct dma_pte *parent, *pte = NULL;
1038 int total = agaw_to_level(domain->agaw);
1039 int offset;
1040
1041 parent = domain->pgd;
1042 while (level <= total) {
90dcfb5e 1043 offset = pfn_level_offset(pfn, total);
ba395927
KA
1044 pte = &parent[offset];
1045 if (level == total)
1046 return pte;
1047
6dd9a7c7
YS
1048 if (!dma_pte_present(pte)) {
1049 *large_page = total;
ba395927 1050 break;
6dd9a7c7
YS
1051 }
1052
e16922af 1053 if (dma_pte_superpage(pte)) {
6dd9a7c7
YS
1054 *large_page = total;
1055 return pte;
1056 }
1057
19c239ce 1058 parent = phys_to_virt(dma_pte_addr(pte));
ba395927
KA
1059 total--;
1060 }
1061 return NULL;
1062}
1063
ba395927 1064/* clear last level pte, a tlb flush should be followed */
5cf0a76f 1065static void dma_pte_clear_range(struct dmar_domain *domain,
595badf5
DW
1066 unsigned long start_pfn,
1067 unsigned long last_pfn)
ba395927 1068{
6dd9a7c7 1069 unsigned int large_page = 1;
310a5ab9 1070 struct dma_pte *first_pte, *pte;
66eae846 1071
162d1b10
JL
1072 BUG_ON(!domain_pfn_supported(domain, start_pfn));
1073 BUG_ON(!domain_pfn_supported(domain, last_pfn));
59c36286 1074 BUG_ON(start_pfn > last_pfn);
ba395927 1075
04b18e65 1076 /* we don't need lock here; nobody else touches the iova range */
59c36286 1077 do {
6dd9a7c7
YS
1078 large_page = 1;
1079 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page);
310a5ab9 1080 if (!pte) {
6dd9a7c7 1081 start_pfn = align_to_level(start_pfn + 1, large_page + 1);
310a5ab9
DW
1082 continue;
1083 }
6dd9a7c7 1084 do {
310a5ab9 1085 dma_clear_pte(pte);
6dd9a7c7 1086 start_pfn += lvl_to_nr_pages(large_page);
310a5ab9 1087 pte++;
75e6bf96
DW
1088 } while (start_pfn <= last_pfn && !first_pte_in_page(pte));
1089
310a5ab9
DW
1090 domain_flush_cache(domain, first_pte,
1091 (void *)pte - (void *)first_pte);
59c36286
DW
1092
1093 } while (start_pfn && start_pfn <= last_pfn);
ba395927
KA
1094}
1095
3269ee0b
AW
1096static void dma_pte_free_level(struct dmar_domain *domain, int level,
1097 struct dma_pte *pte, unsigned long pfn,
1098 unsigned long start_pfn, unsigned long last_pfn)
1099{
1100 pfn = max(start_pfn, pfn);
1101 pte = &pte[pfn_level_offset(pfn, level)];
1102
1103 do {
1104 unsigned long level_pfn;
1105 struct dma_pte *level_pte;
1106
1107 if (!dma_pte_present(pte) || dma_pte_superpage(pte))
1108 goto next;
1109
1110 level_pfn = pfn & level_mask(level - 1);
1111 level_pte = phys_to_virt(dma_pte_addr(pte));
1112
1113 if (level > 2)
1114 dma_pte_free_level(domain, level - 1, level_pte,
1115 level_pfn, start_pfn, last_pfn);
1116
1117 /* If range covers entire pagetable, free it */
1118 if (!(start_pfn > level_pfn ||
08336fd2 1119 last_pfn < level_pfn + level_size(level) - 1)) {
3269ee0b
AW
1120 dma_clear_pte(pte);
1121 domain_flush_cache(domain, pte, sizeof(*pte));
1122 free_pgtable_page(level_pte);
1123 }
1124next:
1125 pfn += level_size(level);
1126 } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1127}
1128
ba395927
KA
1129/* free page table pages. last level pte should already be cleared */
1130static void dma_pte_free_pagetable(struct dmar_domain *domain,
d794dc9b
DW
1131 unsigned long start_pfn,
1132 unsigned long last_pfn)
ba395927 1133{
162d1b10
JL
1134 BUG_ON(!domain_pfn_supported(domain, start_pfn));
1135 BUG_ON(!domain_pfn_supported(domain, last_pfn));
59c36286 1136 BUG_ON(start_pfn > last_pfn);
ba395927 1137
d41a4adb
JL
1138 dma_pte_clear_range(domain, start_pfn, last_pfn);
1139
f3a0a52f 1140 /* We don't need lock here; nobody else touches the iova range */
3269ee0b
AW
1141 dma_pte_free_level(domain, agaw_to_level(domain->agaw),
1142 domain->pgd, 0, start_pfn, last_pfn);
6660c63a 1143
ba395927 1144 /* free pgd */
d794dc9b 1145 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
ba395927
KA
1146 free_pgtable_page(domain->pgd);
1147 domain->pgd = NULL;
1148 }
1149}
1150
ea8ea460
DW
1151/* When a page at a given level is being unlinked from its parent, we don't
1152 need to *modify* it at all. All we need to do is make a list of all the
1153 pages which can be freed just as soon as we've flushed the IOTLB and we
1154 know the hardware page-walk will no longer touch them.
1155 The 'pte' argument is the *parent* PTE, pointing to the page that is to
1156 be freed. */
1157static struct page *dma_pte_list_pagetables(struct dmar_domain *domain,
1158 int level, struct dma_pte *pte,
1159 struct page *freelist)
1160{
1161 struct page *pg;
1162
1163 pg = pfn_to_page(dma_pte_addr(pte) >> PAGE_SHIFT);
1164 pg->freelist = freelist;
1165 freelist = pg;
1166
1167 if (level == 1)
1168 return freelist;
1169
adeb2590
JL
1170 pte = page_address(pg);
1171 do {
ea8ea460
DW
1172 if (dma_pte_present(pte) && !dma_pte_superpage(pte))
1173 freelist = dma_pte_list_pagetables(domain, level - 1,
1174 pte, freelist);
adeb2590
JL
1175 pte++;
1176 } while (!first_pte_in_page(pte));
ea8ea460
DW
1177
1178 return freelist;
1179}
1180
1181static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level,
1182 struct dma_pte *pte, unsigned long pfn,
1183 unsigned long start_pfn,
1184 unsigned long last_pfn,
1185 struct page *freelist)
1186{
1187 struct dma_pte *first_pte = NULL, *last_pte = NULL;
1188
1189 pfn = max(start_pfn, pfn);
1190 pte = &pte[pfn_level_offset(pfn, level)];
1191
1192 do {
1193 unsigned long level_pfn;
1194
1195 if (!dma_pte_present(pte))
1196 goto next;
1197
1198 level_pfn = pfn & level_mask(level);
1199
1200 /* If range covers entire pagetable, free it */
1201 if (start_pfn <= level_pfn &&
1202 last_pfn >= level_pfn + level_size(level) - 1) {
1203 /* These suborbinate page tables are going away entirely. Don't
1204 bother to clear them; we're just going to *free* them. */
1205 if (level > 1 && !dma_pte_superpage(pte))
1206 freelist = dma_pte_list_pagetables(domain, level - 1, pte, freelist);
1207
1208 dma_clear_pte(pte);
1209 if (!first_pte)
1210 first_pte = pte;
1211 last_pte = pte;
1212 } else if (level > 1) {
1213 /* Recurse down into a level that isn't *entirely* obsolete */
1214 freelist = dma_pte_clear_level(domain, level - 1,
1215 phys_to_virt(dma_pte_addr(pte)),
1216 level_pfn, start_pfn, last_pfn,
1217 freelist);
1218 }
1219next:
1220 pfn += level_size(level);
1221 } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1222
1223 if (first_pte)
1224 domain_flush_cache(domain, first_pte,
1225 (void *)++last_pte - (void *)first_pte);
1226
1227 return freelist;
1228}
1229
1230/* We can't just free the pages because the IOMMU may still be walking
1231 the page tables, and may have cached the intermediate levels. The
1232 pages can only be freed after the IOTLB flush has been done. */
b690420a
JR
1233static struct page *domain_unmap(struct dmar_domain *domain,
1234 unsigned long start_pfn,
1235 unsigned long last_pfn)
ea8ea460 1236{
ea8ea460
DW
1237 struct page *freelist = NULL;
1238
162d1b10
JL
1239 BUG_ON(!domain_pfn_supported(domain, start_pfn));
1240 BUG_ON(!domain_pfn_supported(domain, last_pfn));
ea8ea460
DW
1241 BUG_ON(start_pfn > last_pfn);
1242
1243 /* we don't need lock here; nobody else touches the iova range */
1244 freelist = dma_pte_clear_level(domain, agaw_to_level(domain->agaw),
1245 domain->pgd, 0, start_pfn, last_pfn, NULL);
1246
1247 /* free pgd */
1248 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
1249 struct page *pgd_page = virt_to_page(domain->pgd);
1250 pgd_page->freelist = freelist;
1251 freelist = pgd_page;
1252
1253 domain->pgd = NULL;
1254 }
1255
1256 return freelist;
1257}
1258
b690420a 1259static void dma_free_pagelist(struct page *freelist)
ea8ea460
DW
1260{
1261 struct page *pg;
1262
1263 while ((pg = freelist)) {
1264 freelist = pg->freelist;
1265 free_pgtable_page(page_address(pg));
1266 }
1267}
1268
ba395927
KA
1269/* iommu handling */
1270static int iommu_alloc_root_entry(struct intel_iommu *iommu)
1271{
1272 struct root_entry *root;
1273 unsigned long flags;
1274
4c923d47 1275 root = (struct root_entry *)alloc_pgtable_page(iommu->node);
ffebeb46 1276 if (!root) {
9f10e5bf 1277 pr_err("Allocating root entry for %s failed\n",
ffebeb46 1278 iommu->name);
ba395927 1279 return -ENOMEM;
ffebeb46 1280 }
ba395927 1281
5b6985ce 1282 __iommu_flush_cache(iommu, root, ROOT_SIZE);
ba395927
KA
1283
1284 spin_lock_irqsave(&iommu->lock, flags);
1285 iommu->root_entry = root;
1286 spin_unlock_irqrestore(&iommu->lock, flags);
1287
1288 return 0;
1289}
1290
ba395927
KA
1291static void iommu_set_root_entry(struct intel_iommu *iommu)
1292{
03ecc32c 1293 u64 addr;
c416daa9 1294 u32 sts;
ba395927
KA
1295 unsigned long flag;
1296
03ecc32c 1297 addr = virt_to_phys(iommu->root_entry);
c83b2f20 1298 if (ecs_enabled(iommu))
03ecc32c 1299 addr |= DMA_RTADDR_RTT;
ba395927 1300
1f5b3c3f 1301 raw_spin_lock_irqsave(&iommu->register_lock, flag);
03ecc32c 1302 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, addr);
ba395927 1303
c416daa9 1304 writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
1305
1306 /* Make sure hardware complete it */
1307 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1308 readl, (sts & DMA_GSTS_RTPS), sts);
ba395927 1309
1f5b3c3f 1310 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1311}
1312
1313static void iommu_flush_write_buffer(struct intel_iommu *iommu)
1314{
1315 u32 val;
1316 unsigned long flag;
1317
9af88143 1318 if (!rwbf_quirk && !cap_rwbf(iommu->cap))
ba395927 1319 return;
ba395927 1320
1f5b3c3f 1321 raw_spin_lock_irqsave(&iommu->register_lock, flag);
462b60f6 1322 writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
1323
1324 /* Make sure hardware complete it */
1325 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1326 readl, (!(val & DMA_GSTS_WBFS)), val);
ba395927 1327
1f5b3c3f 1328 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1329}
1330
1331/* return value determine if we need a write buffer flush */
4c25a2c1
DW
1332static void __iommu_flush_context(struct intel_iommu *iommu,
1333 u16 did, u16 source_id, u8 function_mask,
1334 u64 type)
ba395927
KA
1335{
1336 u64 val = 0;
1337 unsigned long flag;
1338
ba395927
KA
1339 switch (type) {
1340 case DMA_CCMD_GLOBAL_INVL:
1341 val = DMA_CCMD_GLOBAL_INVL;
1342 break;
1343 case DMA_CCMD_DOMAIN_INVL:
1344 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
1345 break;
1346 case DMA_CCMD_DEVICE_INVL:
1347 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
1348 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
1349 break;
1350 default:
1351 BUG();
1352 }
1353 val |= DMA_CCMD_ICC;
1354
1f5b3c3f 1355 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
1356 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
1357
1358 /* Make sure hardware complete it */
1359 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
1360 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
1361
1f5b3c3f 1362 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1363}
1364
ba395927 1365/* return value determine if we need a write buffer flush */
1f0ef2aa
DW
1366static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
1367 u64 addr, unsigned int size_order, u64 type)
ba395927
KA
1368{
1369 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
1370 u64 val = 0, val_iva = 0;
1371 unsigned long flag;
1372
ba395927
KA
1373 switch (type) {
1374 case DMA_TLB_GLOBAL_FLUSH:
1375 /* global flush doesn't need set IVA_REG */
1376 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
1377 break;
1378 case DMA_TLB_DSI_FLUSH:
1379 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1380 break;
1381 case DMA_TLB_PSI_FLUSH:
1382 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
ea8ea460 1383 /* IH bit is passed in as part of address */
ba395927
KA
1384 val_iva = size_order | addr;
1385 break;
1386 default:
1387 BUG();
1388 }
1389 /* Note: set drain read/write */
1390#if 0
1391 /*
1392 * This is probably to be super secure.. Looks like we can
1393 * ignore it without any impact.
1394 */
1395 if (cap_read_drain(iommu->cap))
1396 val |= DMA_TLB_READ_DRAIN;
1397#endif
1398 if (cap_write_drain(iommu->cap))
1399 val |= DMA_TLB_WRITE_DRAIN;
1400
1f5b3c3f 1401 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
1402 /* Note: Only uses first TLB reg currently */
1403 if (val_iva)
1404 dmar_writeq(iommu->reg + tlb_offset, val_iva);
1405 dmar_writeq(iommu->reg + tlb_offset + 8, val);
1406
1407 /* Make sure hardware complete it */
1408 IOMMU_WAIT_OP(iommu, tlb_offset + 8,
1409 dmar_readq, (!(val & DMA_TLB_IVT)), val);
1410
1f5b3c3f 1411 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1412
1413 /* check IOTLB invalidation granularity */
1414 if (DMA_TLB_IAIG(val) == 0)
9f10e5bf 1415 pr_err("Flush IOTLB failed\n");
ba395927 1416 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
9f10e5bf 1417 pr_debug("TLB flush request %Lx, actual %Lx\n",
5b6985ce
FY
1418 (unsigned long long)DMA_TLB_IIRG(type),
1419 (unsigned long long)DMA_TLB_IAIG(val));
ba395927
KA
1420}
1421
64ae892b
DW
1422static struct device_domain_info *
1423iommu_support_dev_iotlb (struct dmar_domain *domain, struct intel_iommu *iommu,
1424 u8 bus, u8 devfn)
93a23a72 1425{
93a23a72 1426 struct device_domain_info *info;
93a23a72 1427
55d94043
JR
1428 assert_spin_locked(&device_domain_lock);
1429
93a23a72
YZ
1430 if (!iommu->qi)
1431 return NULL;
1432
93a23a72 1433 list_for_each_entry(info, &domain->devices, link)
c3b497c6
JL
1434 if (info->iommu == iommu && info->bus == bus &&
1435 info->devfn == devfn) {
b16d0cb9
DW
1436 if (info->ats_supported && info->dev)
1437 return info;
93a23a72
YZ
1438 break;
1439 }
93a23a72 1440
b16d0cb9 1441 return NULL;
93a23a72
YZ
1442}
1443
1444static void iommu_enable_dev_iotlb(struct device_domain_info *info)
ba395927 1445{
fb0cc3aa
BH
1446 struct pci_dev *pdev;
1447
0bcb3e28 1448 if (!info || !dev_is_pci(info->dev))
93a23a72
YZ
1449 return;
1450
fb0cc3aa 1451 pdev = to_pci_dev(info->dev);
fb0cc3aa 1452
b16d0cb9
DW
1453#ifdef CONFIG_INTEL_IOMMU_SVM
1454 /* The PCIe spec, in its wisdom, declares that the behaviour of
1455 the device if you enable PASID support after ATS support is
1456 undefined. So always enable PASID support on devices which
1457 have it, even if we can't yet know if we're ever going to
1458 use it. */
1459 if (info->pasid_supported && !pci_enable_pasid(pdev, info->pasid_supported & ~1))
1460 info->pasid_enabled = 1;
1461
1462 if (info->pri_supported && !pci_reset_pri(pdev) && !pci_enable_pri(pdev, 32))
1463 info->pri_enabled = 1;
1464#endif
1465 if (info->ats_supported && !pci_enable_ats(pdev, VTD_PAGE_SHIFT)) {
1466 info->ats_enabled = 1;
1467 info->ats_qdep = pci_ats_queue_depth(pdev);
1468 }
93a23a72
YZ
1469}
1470
1471static void iommu_disable_dev_iotlb(struct device_domain_info *info)
1472{
b16d0cb9
DW
1473 struct pci_dev *pdev;
1474
1475 if (dev_is_pci(info->dev))
93a23a72
YZ
1476 return;
1477
b16d0cb9
DW
1478 pdev = to_pci_dev(info->dev);
1479
1480 if (info->ats_enabled) {
1481 pci_disable_ats(pdev);
1482 info->ats_enabled = 0;
1483 }
1484#ifdef CONFIG_INTEL_IOMMU_SVM
1485 if (info->pri_enabled) {
1486 pci_disable_pri(pdev);
1487 info->pri_enabled = 0;
1488 }
1489 if (info->pasid_enabled) {
1490 pci_disable_pasid(pdev);
1491 info->pasid_enabled = 0;
1492 }
1493#endif
93a23a72
YZ
1494}
1495
1496static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1497 u64 addr, unsigned mask)
1498{
1499 u16 sid, qdep;
1500 unsigned long flags;
1501 struct device_domain_info *info;
1502
1503 spin_lock_irqsave(&device_domain_lock, flags);
1504 list_for_each_entry(info, &domain->devices, link) {
b16d0cb9 1505 if (!info->ats_enabled)
93a23a72
YZ
1506 continue;
1507
1508 sid = info->bus << 8 | info->devfn;
b16d0cb9 1509 qdep = info->ats_qdep;
93a23a72
YZ
1510 qi_flush_dev_iotlb(info->iommu, sid, qdep, addr, mask);
1511 }
1512 spin_unlock_irqrestore(&device_domain_lock, flags);
1513}
1514
a1ddcbe9
JR
1515static void iommu_flush_iotlb_psi(struct intel_iommu *iommu,
1516 struct dmar_domain *domain,
1517 unsigned long pfn, unsigned int pages,
1518 int ih, int map)
ba395927 1519{
9dd2fe89 1520 unsigned int mask = ilog2(__roundup_pow_of_two(pages));
03d6a246 1521 uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
a1ddcbe9 1522 u16 did = domain->iommu_did[iommu->seq_id];
ba395927 1523
ba395927
KA
1524 BUG_ON(pages == 0);
1525
ea8ea460
DW
1526 if (ih)
1527 ih = 1 << 6;
ba395927 1528 /*
9dd2fe89
YZ
1529 * Fallback to domain selective flush if no PSI support or the size is
1530 * too big.
ba395927
KA
1531 * PSI requires page size to be 2 ^ x, and the base address is naturally
1532 * aligned to the size
1533 */
9dd2fe89
YZ
1534 if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap))
1535 iommu->flush.flush_iotlb(iommu, did, 0, 0,
1f0ef2aa 1536 DMA_TLB_DSI_FLUSH);
9dd2fe89 1537 else
ea8ea460 1538 iommu->flush.flush_iotlb(iommu, did, addr | ih, mask,
9dd2fe89 1539 DMA_TLB_PSI_FLUSH);
bf92df30
YZ
1540
1541 /*
82653633
NA
1542 * In caching mode, changes of pages from non-present to present require
1543 * flush. However, device IOTLB doesn't need to be flushed in this case.
bf92df30 1544 */
82653633 1545 if (!cap_caching_mode(iommu->cap) || !map)
9452d5bf
JR
1546 iommu_flush_dev_iotlb(get_iommu_domain(iommu, did),
1547 addr, mask);
ba395927
KA
1548}
1549
f8bab735 1550static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
1551{
1552 u32 pmen;
1553 unsigned long flags;
1554
1f5b3c3f 1555 raw_spin_lock_irqsave(&iommu->register_lock, flags);
f8bab735 1556 pmen = readl(iommu->reg + DMAR_PMEN_REG);
1557 pmen &= ~DMA_PMEN_EPM;
1558 writel(pmen, iommu->reg + DMAR_PMEN_REG);
1559
1560 /* wait for the protected region status bit to clear */
1561 IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
1562 readl, !(pmen & DMA_PMEN_PRS), pmen);
1563
1f5b3c3f 1564 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
f8bab735 1565}
1566
2a41ccee 1567static void iommu_enable_translation(struct intel_iommu *iommu)
ba395927
KA
1568{
1569 u32 sts;
1570 unsigned long flags;
1571
1f5b3c3f 1572 raw_spin_lock_irqsave(&iommu->register_lock, flags);
c416daa9
DW
1573 iommu->gcmd |= DMA_GCMD_TE;
1574 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
1575
1576 /* Make sure hardware complete it */
1577 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1578 readl, (sts & DMA_GSTS_TES), sts);
ba395927 1579
1f5b3c3f 1580 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
ba395927
KA
1581}
1582
2a41ccee 1583static void iommu_disable_translation(struct intel_iommu *iommu)
ba395927
KA
1584{
1585 u32 sts;
1586 unsigned long flag;
1587
1f5b3c3f 1588 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
1589 iommu->gcmd &= ~DMA_GCMD_TE;
1590 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1591
1592 /* Make sure hardware complete it */
1593 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1594 readl, (!(sts & DMA_GSTS_TES)), sts);
ba395927 1595
1f5b3c3f 1596 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1597}
1598
3460a6d9 1599
ba395927
KA
1600static int iommu_init_domains(struct intel_iommu *iommu)
1601{
8bf47816
JR
1602 u32 ndomains, nlongs;
1603 size_t size;
ba395927
KA
1604
1605 ndomains = cap_ndoms(iommu->cap);
8bf47816 1606 pr_debug("%s: Number of Domains supported <%d>\n",
9f10e5bf 1607 iommu->name, ndomains);
ba395927
KA
1608 nlongs = BITS_TO_LONGS(ndomains);
1609
94a91b50
DD
1610 spin_lock_init(&iommu->lock);
1611
ba395927
KA
1612 iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1613 if (!iommu->domain_ids) {
9f10e5bf
JR
1614 pr_err("%s: Allocating domain id array failed\n",
1615 iommu->name);
ba395927
KA
1616 return -ENOMEM;
1617 }
8bf47816
JR
1618
1619 size = ((ndomains >> 8) + 1) * sizeof(struct dmar_domain **);
1620 iommu->domains = kzalloc(size, GFP_KERNEL);
1621
1622 if (iommu->domains) {
1623 size = 256 * sizeof(struct dmar_domain *);
1624 iommu->domains[0] = kzalloc(size, GFP_KERNEL);
1625 }
1626
1627 if (!iommu->domains || !iommu->domains[0]) {
9f10e5bf
JR
1628 pr_err("%s: Allocating domain array failed\n",
1629 iommu->name);
852bdb04 1630 kfree(iommu->domain_ids);
8bf47816 1631 kfree(iommu->domains);
852bdb04 1632 iommu->domain_ids = NULL;
8bf47816 1633 iommu->domains = NULL;
ba395927
KA
1634 return -ENOMEM;
1635 }
1636
8bf47816
JR
1637
1638
ba395927 1639 /*
c0e8a6c8
JR
1640 * If Caching mode is set, then invalid translations are tagged
1641 * with domain-id 0, hence we need to pre-allocate it. We also
1642 * use domain-id 0 as a marker for non-allocated domain-id, so
1643 * make sure it is not used for a real domain.
ba395927 1644 */
c0e8a6c8
JR
1645 set_bit(0, iommu->domain_ids);
1646
ba395927
KA
1647 return 0;
1648}
ba395927 1649
ffebeb46 1650static void disable_dmar_iommu(struct intel_iommu *iommu)
ba395927 1651{
29a27719 1652 struct device_domain_info *info, *tmp;
55d94043 1653 unsigned long flags;
ba395927 1654
29a27719
JR
1655 if (!iommu->domains || !iommu->domain_ids)
1656 return;
a4eaa86c 1657
55d94043 1658 spin_lock_irqsave(&device_domain_lock, flags);
29a27719
JR
1659 list_for_each_entry_safe(info, tmp, &device_domain_list, global) {
1660 struct dmar_domain *domain;
1661
1662 if (info->iommu != iommu)
1663 continue;
1664
1665 if (!info->dev || !info->domain)
1666 continue;
1667
1668 domain = info->domain;
1669
e6de0f8d 1670 dmar_remove_one_dev_info(domain, info->dev);
29a27719
JR
1671
1672 if (!domain_type_is_vm_or_si(domain))
1673 domain_exit(domain);
ba395927 1674 }
55d94043 1675 spin_unlock_irqrestore(&device_domain_lock, flags);
ba395927
KA
1676
1677 if (iommu->gcmd & DMA_GCMD_TE)
1678 iommu_disable_translation(iommu);
ffebeb46 1679}
ba395927 1680
ffebeb46
JL
1681static void free_dmar_iommu(struct intel_iommu *iommu)
1682{
1683 if ((iommu->domains) && (iommu->domain_ids)) {
8bf47816
JR
1684 int elems = (cap_ndoms(iommu->cap) >> 8) + 1;
1685 int i;
1686
1687 for (i = 0; i < elems; i++)
1688 kfree(iommu->domains[i]);
ffebeb46
JL
1689 kfree(iommu->domains);
1690 kfree(iommu->domain_ids);
1691 iommu->domains = NULL;
1692 iommu->domain_ids = NULL;
1693 }
ba395927 1694
d9630fe9
WH
1695 g_iommus[iommu->seq_id] = NULL;
1696
ba395927
KA
1697 /* free context mapping */
1698 free_context_table(iommu);
8a94ade4
DW
1699
1700#ifdef CONFIG_INTEL_IOMMU_SVM
1701 if (pasid_enabled(iommu))
1702 intel_svm_free_pasid_tables(iommu);
1703#endif
ba395927
KA
1704}
1705
ab8dfe25 1706static struct dmar_domain *alloc_domain(int flags)
ba395927 1707{
ba395927 1708 struct dmar_domain *domain;
ba395927
KA
1709
1710 domain = alloc_domain_mem();
1711 if (!domain)
1712 return NULL;
1713
ab8dfe25 1714 memset(domain, 0, sizeof(*domain));
4c923d47 1715 domain->nid = -1;
ab8dfe25 1716 domain->flags = flags;
92d03cc8 1717 INIT_LIST_HEAD(&domain->devices);
2c2e2c38
FY
1718
1719 return domain;
1720}
1721
d160aca5
JR
1722/* Must be called with iommu->lock */
1723static int domain_attach_iommu(struct dmar_domain *domain,
fb170fb4
JL
1724 struct intel_iommu *iommu)
1725{
44bde614 1726 unsigned long ndomains;
55d94043 1727 int num;
44bde614 1728
55d94043 1729 assert_spin_locked(&device_domain_lock);
d160aca5 1730 assert_spin_locked(&iommu->lock);
ba395927 1731
29a27719
JR
1732 domain->iommu_refcnt[iommu->seq_id] += 1;
1733 domain->iommu_count += 1;
1734 if (domain->iommu_refcnt[iommu->seq_id] == 1) {
fb170fb4 1735 ndomains = cap_ndoms(iommu->cap);
d160aca5
JR
1736 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1737
1738 if (num >= ndomains) {
1739 pr_err("%s: No free domain ids\n", iommu->name);
1740 domain->iommu_refcnt[iommu->seq_id] -= 1;
1741 domain->iommu_count -= 1;
55d94043 1742 return -ENOSPC;
2c2e2c38 1743 }
ba395927 1744
d160aca5
JR
1745 set_bit(num, iommu->domain_ids);
1746 set_iommu_domain(iommu, num, domain);
1747
1748 domain->iommu_did[iommu->seq_id] = num;
1749 domain->nid = iommu->node;
fb170fb4 1750
fb170fb4
JL
1751 domain_update_iommu_cap(domain);
1752 }
d160aca5 1753
55d94043 1754 return 0;
fb170fb4
JL
1755}
1756
1757static int domain_detach_iommu(struct dmar_domain *domain,
1758 struct intel_iommu *iommu)
1759{
d160aca5 1760 int num, count = INT_MAX;
d160aca5 1761
55d94043 1762 assert_spin_locked(&device_domain_lock);
d160aca5 1763 assert_spin_locked(&iommu->lock);
fb170fb4 1764
29a27719
JR
1765 domain->iommu_refcnt[iommu->seq_id] -= 1;
1766 count = --domain->iommu_count;
1767 if (domain->iommu_refcnt[iommu->seq_id] == 0) {
d160aca5
JR
1768 num = domain->iommu_did[iommu->seq_id];
1769 clear_bit(num, iommu->domain_ids);
1770 set_iommu_domain(iommu, num, NULL);
fb170fb4 1771
fb170fb4 1772 domain_update_iommu_cap(domain);
c0e8a6c8 1773 domain->iommu_did[iommu->seq_id] = 0;
fb170fb4 1774 }
fb170fb4
JL
1775
1776 return count;
1777}
1778
ba395927 1779static struct iova_domain reserved_iova_list;
8a443df4 1780static struct lock_class_key reserved_rbtree_key;
ba395927 1781
51a63e67 1782static int dmar_init_reserved_ranges(void)
ba395927
KA
1783{
1784 struct pci_dev *pdev = NULL;
1785 struct iova *iova;
1786 int i;
ba395927 1787
0fb5fe87
RM
1788 init_iova_domain(&reserved_iova_list, VTD_PAGE_SIZE, IOVA_START_PFN,
1789 DMA_32BIT_PFN);
ba395927 1790
8a443df4
MG
1791 lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1792 &reserved_rbtree_key);
1793
ba395927
KA
1794 /* IOAPIC ranges shouldn't be accessed by DMA */
1795 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1796 IOVA_PFN(IOAPIC_RANGE_END));
51a63e67 1797 if (!iova) {
9f10e5bf 1798 pr_err("Reserve IOAPIC range failed\n");
51a63e67
JC
1799 return -ENODEV;
1800 }
ba395927
KA
1801
1802 /* Reserve all PCI MMIO to avoid peer-to-peer access */
1803 for_each_pci_dev(pdev) {
1804 struct resource *r;
1805
1806 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1807 r = &pdev->resource[i];
1808 if (!r->flags || !(r->flags & IORESOURCE_MEM))
1809 continue;
1a4a4551
DW
1810 iova = reserve_iova(&reserved_iova_list,
1811 IOVA_PFN(r->start),
1812 IOVA_PFN(r->end));
51a63e67 1813 if (!iova) {
9f10e5bf 1814 pr_err("Reserve iova failed\n");
51a63e67
JC
1815 return -ENODEV;
1816 }
ba395927
KA
1817 }
1818 }
51a63e67 1819 return 0;
ba395927
KA
1820}
1821
1822static void domain_reserve_special_ranges(struct dmar_domain *domain)
1823{
1824 copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1825}
1826
1827static inline int guestwidth_to_adjustwidth(int gaw)
1828{
1829 int agaw;
1830 int r = (gaw - 12) % 9;
1831
1832 if (r == 0)
1833 agaw = gaw;
1834 else
1835 agaw = gaw + 9 - r;
1836 if (agaw > 64)
1837 agaw = 64;
1838 return agaw;
1839}
1840
dc534b25
JR
1841static int domain_init(struct dmar_domain *domain, struct intel_iommu *iommu,
1842 int guest_width)
ba395927 1843{
ba395927
KA
1844 int adjust_width, agaw;
1845 unsigned long sagaw;
1846
0fb5fe87
RM
1847 init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN,
1848 DMA_32BIT_PFN);
ba395927
KA
1849 domain_reserve_special_ranges(domain);
1850
1851 /* calculate AGAW */
ba395927
KA
1852 if (guest_width > cap_mgaw(iommu->cap))
1853 guest_width = cap_mgaw(iommu->cap);
1854 domain->gaw = guest_width;
1855 adjust_width = guestwidth_to_adjustwidth(guest_width);
1856 agaw = width_to_agaw(adjust_width);
1857 sagaw = cap_sagaw(iommu->cap);
1858 if (!test_bit(agaw, &sagaw)) {
1859 /* hardware doesn't support it, choose a bigger one */
9f10e5bf 1860 pr_debug("Hardware doesn't support agaw %d\n", agaw);
ba395927
KA
1861 agaw = find_next_bit(&sagaw, 5, agaw);
1862 if (agaw >= 5)
1863 return -ENODEV;
1864 }
1865 domain->agaw = agaw;
ba395927 1866
8e604097
WH
1867 if (ecap_coherent(iommu->ecap))
1868 domain->iommu_coherency = 1;
1869 else
1870 domain->iommu_coherency = 0;
1871
58c610bd
SY
1872 if (ecap_sc_support(iommu->ecap))
1873 domain->iommu_snooping = 1;
1874 else
1875 domain->iommu_snooping = 0;
1876
214e39aa
DW
1877 if (intel_iommu_superpage)
1878 domain->iommu_superpage = fls(cap_super_page_val(iommu->cap));
1879 else
1880 domain->iommu_superpage = 0;
1881
4c923d47 1882 domain->nid = iommu->node;
c7151a8d 1883
ba395927 1884 /* always allocate the top pgd */
4c923d47 1885 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
ba395927
KA
1886 if (!domain->pgd)
1887 return -ENOMEM;
5b6985ce 1888 __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
ba395927
KA
1889 return 0;
1890}
1891
1892static void domain_exit(struct dmar_domain *domain)
1893{
ea8ea460 1894 struct page *freelist = NULL;
ba395927
KA
1895
1896 /* Domain 0 is reserved, so dont process it */
1897 if (!domain)
1898 return;
1899
7b668357
AW
1900 /* Flush any lazy unmaps that may reference this domain */
1901 if (!intel_iommu_strict)
1902 flush_unmaps_timeout(0);
1903
d160aca5
JR
1904 /* Remove associated devices and clear attached or cached domains */
1905 rcu_read_lock();
ba395927 1906 domain_remove_dev_info(domain);
d160aca5 1907 rcu_read_unlock();
92d03cc8 1908
ba395927
KA
1909 /* destroy iovas */
1910 put_iova_domain(&domain->iovad);
ba395927 1911
ea8ea460 1912 freelist = domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
ba395927 1913
ea8ea460
DW
1914 dma_free_pagelist(freelist);
1915
ba395927
KA
1916 free_domain_mem(domain);
1917}
1918
64ae892b
DW
1919static int domain_context_mapping_one(struct dmar_domain *domain,
1920 struct intel_iommu *iommu,
28ccce0d 1921 u8 bus, u8 devfn)
ba395927 1922{
c6c2cebd 1923 u16 did = domain->iommu_did[iommu->seq_id];
28ccce0d
JR
1924 int translation = CONTEXT_TT_MULTI_LEVEL;
1925 struct device_domain_info *info = NULL;
ba395927 1926 struct context_entry *context;
ba395927 1927 unsigned long flags;
ea6606b0 1928 struct dma_pte *pgd;
55d94043 1929 int ret, agaw;
28ccce0d 1930
c6c2cebd
JR
1931 WARN_ON(did == 0);
1932
28ccce0d
JR
1933 if (hw_pass_through && domain_type_is_si(domain))
1934 translation = CONTEXT_TT_PASS_THROUGH;
ba395927
KA
1935
1936 pr_debug("Set context mapping for %02x:%02x.%d\n",
1937 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
4ed0d3e6 1938
ba395927 1939 BUG_ON(!domain->pgd);
5331fe6f 1940
55d94043
JR
1941 spin_lock_irqsave(&device_domain_lock, flags);
1942 spin_lock(&iommu->lock);
1943
1944 ret = -ENOMEM;
03ecc32c 1945 context = iommu_context_addr(iommu, bus, devfn, 1);
ba395927 1946 if (!context)
55d94043 1947 goto out_unlock;
ba395927 1948
55d94043
JR
1949 ret = 0;
1950 if (context_present(context))
1951 goto out_unlock;
cf484d0e 1952
ea6606b0
WH
1953 pgd = domain->pgd;
1954
de24e553 1955 context_clear_entry(context);
c6c2cebd 1956 context_set_domain_id(context, did);
ea6606b0 1957
de24e553
JR
1958 /*
1959 * Skip top levels of page tables for iommu which has less agaw
1960 * than default. Unnecessary for PT mode.
1961 */
93a23a72 1962 if (translation != CONTEXT_TT_PASS_THROUGH) {
de24e553 1963 for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) {
55d94043 1964 ret = -ENOMEM;
de24e553 1965 pgd = phys_to_virt(dma_pte_addr(pgd));
55d94043
JR
1966 if (!dma_pte_present(pgd))
1967 goto out_unlock;
ea6606b0 1968 }
4ed0d3e6 1969
64ae892b 1970 info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);
b16d0cb9
DW
1971 if (info && info->ats_supported)
1972 translation = CONTEXT_TT_DEV_IOTLB;
1973 else
1974 translation = CONTEXT_TT_MULTI_LEVEL;
de24e553 1975
93a23a72
YZ
1976 context_set_address_root(context, virt_to_phys(pgd));
1977 context_set_address_width(context, iommu->agaw);
de24e553
JR
1978 } else {
1979 /*
1980 * In pass through mode, AW must be programmed to
1981 * indicate the largest AGAW value supported by
1982 * hardware. And ASR is ignored by hardware.
1983 */
1984 context_set_address_width(context, iommu->msagaw);
93a23a72 1985 }
4ed0d3e6
FY
1986
1987 context_set_translation_type(context, translation);
c07e7d21
MM
1988 context_set_fault_enable(context);
1989 context_set_present(context);
5331fe6f 1990 domain_flush_cache(domain, context, sizeof(*context));
ba395927 1991
4c25a2c1
DW
1992 /*
1993 * It's a non-present to present mapping. If hardware doesn't cache
1994 * non-present entry we only need to flush the write-buffer. If the
1995 * _does_ cache non-present entries, then it does so in the special
1996 * domain #0, which we have to flush:
1997 */
1998 if (cap_caching_mode(iommu->cap)) {
1999 iommu->flush.flush_context(iommu, 0,
2000 (((u16)bus) << 8) | devfn,
2001 DMA_CCMD_MASK_NOBIT,
2002 DMA_CCMD_DEVICE_INVL);
c6c2cebd 2003 iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
4c25a2c1 2004 } else {
ba395927 2005 iommu_flush_write_buffer(iommu);
4c25a2c1 2006 }
93a23a72 2007 iommu_enable_dev_iotlb(info);
c7151a8d 2008
55d94043
JR
2009 ret = 0;
2010
2011out_unlock:
2012 spin_unlock(&iommu->lock);
2013 spin_unlock_irqrestore(&device_domain_lock, flags);
fb170fb4 2014
ba395927
KA
2015 return 0;
2016}
2017
579305f7
AW
2018struct domain_context_mapping_data {
2019 struct dmar_domain *domain;
2020 struct intel_iommu *iommu;
579305f7
AW
2021};
2022
2023static int domain_context_mapping_cb(struct pci_dev *pdev,
2024 u16 alias, void *opaque)
2025{
2026 struct domain_context_mapping_data *data = opaque;
2027
2028 return domain_context_mapping_one(data->domain, data->iommu,
28ccce0d 2029 PCI_BUS_NUM(alias), alias & 0xff);
579305f7
AW
2030}
2031
ba395927 2032static int
28ccce0d 2033domain_context_mapping(struct dmar_domain *domain, struct device *dev)
ba395927 2034{
64ae892b 2035 struct intel_iommu *iommu;
156baca8 2036 u8 bus, devfn;
579305f7 2037 struct domain_context_mapping_data data;
64ae892b 2038
e1f167f3 2039 iommu = device_to_iommu(dev, &bus, &devfn);
64ae892b
DW
2040 if (!iommu)
2041 return -ENODEV;
ba395927 2042
579305f7 2043 if (!dev_is_pci(dev))
28ccce0d 2044 return domain_context_mapping_one(domain, iommu, bus, devfn);
579305f7
AW
2045
2046 data.domain = domain;
2047 data.iommu = iommu;
579305f7
AW
2048
2049 return pci_for_each_dma_alias(to_pci_dev(dev),
2050 &domain_context_mapping_cb, &data);
2051}
2052
2053static int domain_context_mapped_cb(struct pci_dev *pdev,
2054 u16 alias, void *opaque)
2055{
2056 struct intel_iommu *iommu = opaque;
2057
2058 return !device_context_mapped(iommu, PCI_BUS_NUM(alias), alias & 0xff);
ba395927
KA
2059}
2060
e1f167f3 2061static int domain_context_mapped(struct device *dev)
ba395927 2062{
5331fe6f 2063 struct intel_iommu *iommu;
156baca8 2064 u8 bus, devfn;
5331fe6f 2065
e1f167f3 2066 iommu = device_to_iommu(dev, &bus, &devfn);
5331fe6f
WH
2067 if (!iommu)
2068 return -ENODEV;
ba395927 2069
579305f7
AW
2070 if (!dev_is_pci(dev))
2071 return device_context_mapped(iommu, bus, devfn);
e1f167f3 2072
579305f7
AW
2073 return !pci_for_each_dma_alias(to_pci_dev(dev),
2074 domain_context_mapped_cb, iommu);
ba395927
KA
2075}
2076
f532959b
FY
2077/* Returns a number of VTD pages, but aligned to MM page size */
2078static inline unsigned long aligned_nrpages(unsigned long host_addr,
2079 size_t size)
2080{
2081 host_addr &= ~PAGE_MASK;
2082 return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
2083}
2084
6dd9a7c7
YS
2085/* Return largest possible superpage level for a given mapping */
2086static inline int hardware_largepage_caps(struct dmar_domain *domain,
2087 unsigned long iov_pfn,
2088 unsigned long phy_pfn,
2089 unsigned long pages)
2090{
2091 int support, level = 1;
2092 unsigned long pfnmerge;
2093
2094 support = domain->iommu_superpage;
2095
2096 /* To use a large page, the virtual *and* physical addresses
2097 must be aligned to 2MiB/1GiB/etc. Lower bits set in either
2098 of them will mean we have to use smaller pages. So just
2099 merge them and check both at once. */
2100 pfnmerge = iov_pfn | phy_pfn;
2101
2102 while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) {
2103 pages >>= VTD_STRIDE_SHIFT;
2104 if (!pages)
2105 break;
2106 pfnmerge >>= VTD_STRIDE_SHIFT;
2107 level++;
2108 support--;
2109 }
2110 return level;
2111}
2112
9051aa02
DW
2113static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2114 struct scatterlist *sg, unsigned long phys_pfn,
2115 unsigned long nr_pages, int prot)
e1605495
DW
2116{
2117 struct dma_pte *first_pte = NULL, *pte = NULL;
9051aa02 2118 phys_addr_t uninitialized_var(pteval);
cc4f14aa 2119 unsigned long sg_res = 0;
6dd9a7c7
YS
2120 unsigned int largepage_lvl = 0;
2121 unsigned long lvl_pages = 0;
e1605495 2122
162d1b10 2123 BUG_ON(!domain_pfn_supported(domain, iov_pfn + nr_pages - 1));
e1605495
DW
2124
2125 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
2126 return -EINVAL;
2127
2128 prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
2129
cc4f14aa
JL
2130 if (!sg) {
2131 sg_res = nr_pages;
9051aa02
DW
2132 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
2133 }
2134
6dd9a7c7 2135 while (nr_pages > 0) {
c85994e4
DW
2136 uint64_t tmp;
2137
e1605495 2138 if (!sg_res) {
f532959b 2139 sg_res = aligned_nrpages(sg->offset, sg->length);
e1605495
DW
2140 sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset;
2141 sg->dma_length = sg->length;
db0fa0cb 2142 pteval = (sg_phys(sg) & PAGE_MASK) | prot;
6dd9a7c7 2143 phys_pfn = pteval >> VTD_PAGE_SHIFT;
e1605495 2144 }
6dd9a7c7 2145
e1605495 2146 if (!pte) {
6dd9a7c7
YS
2147 largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res);
2148
5cf0a76f 2149 first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl);
e1605495
DW
2150 if (!pte)
2151 return -ENOMEM;
6dd9a7c7 2152 /* It is large page*/
6491d4d0 2153 if (largepage_lvl > 1) {
ba2374fd
CZ
2154 unsigned long nr_superpages, end_pfn;
2155
6dd9a7c7 2156 pteval |= DMA_PTE_LARGE_PAGE;
d41a4adb 2157 lvl_pages = lvl_to_nr_pages(largepage_lvl);
ba2374fd
CZ
2158
2159 nr_superpages = sg_res / lvl_pages;
2160 end_pfn = iov_pfn + nr_superpages * lvl_pages - 1;
2161
d41a4adb
JL
2162 /*
2163 * Ensure that old small page tables are
ba2374fd 2164 * removed to make room for superpage(s).
d41a4adb 2165 */
ba2374fd 2166 dma_pte_free_pagetable(domain, iov_pfn, end_pfn);
6491d4d0 2167 } else {
6dd9a7c7 2168 pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
6491d4d0 2169 }
6dd9a7c7 2170
e1605495
DW
2171 }
2172 /* We don't need lock here, nobody else
2173 * touches the iova range
2174 */
7766a3fb 2175 tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
c85994e4 2176 if (tmp) {
1bf20f0d 2177 static int dumps = 5;
9f10e5bf
JR
2178 pr_crit("ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
2179 iov_pfn, tmp, (unsigned long long)pteval);
1bf20f0d
DW
2180 if (dumps) {
2181 dumps--;
2182 debug_dma_dump_mappings(NULL);
2183 }
2184 WARN_ON(1);
2185 }
6dd9a7c7
YS
2186
2187 lvl_pages = lvl_to_nr_pages(largepage_lvl);
2188
2189 BUG_ON(nr_pages < lvl_pages);
2190 BUG_ON(sg_res < lvl_pages);
2191
2192 nr_pages -= lvl_pages;
2193 iov_pfn += lvl_pages;
2194 phys_pfn += lvl_pages;
2195 pteval += lvl_pages * VTD_PAGE_SIZE;
2196 sg_res -= lvl_pages;
2197
2198 /* If the next PTE would be the first in a new page, then we
2199 need to flush the cache on the entries we've just written.
2200 And then we'll need to recalculate 'pte', so clear it and
2201 let it get set again in the if (!pte) block above.
2202
2203 If we're done (!nr_pages) we need to flush the cache too.
2204
2205 Also if we've been setting superpages, we may need to
2206 recalculate 'pte' and switch back to smaller pages for the
2207 end of the mapping, if the trailing size is not enough to
2208 use another superpage (i.e. sg_res < lvl_pages). */
e1605495 2209 pte++;
6dd9a7c7
YS
2210 if (!nr_pages || first_pte_in_page(pte) ||
2211 (largepage_lvl > 1 && sg_res < lvl_pages)) {
e1605495
DW
2212 domain_flush_cache(domain, first_pte,
2213 (void *)pte - (void *)first_pte);
2214 pte = NULL;
2215 }
6dd9a7c7
YS
2216
2217 if (!sg_res && nr_pages)
e1605495
DW
2218 sg = sg_next(sg);
2219 }
2220 return 0;
2221}
2222
9051aa02
DW
2223static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2224 struct scatterlist *sg, unsigned long nr_pages,
2225 int prot)
ba395927 2226{
9051aa02
DW
2227 return __domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot);
2228}
6f6a00e4 2229
9051aa02
DW
2230static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2231 unsigned long phys_pfn, unsigned long nr_pages,
2232 int prot)
2233{
2234 return __domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot);
ba395927
KA
2235}
2236
2452d9db 2237static void domain_context_clear_one(struct intel_iommu *iommu, u8 bus, u8 devfn)
ba395927 2238{
c7151a8d
WH
2239 if (!iommu)
2240 return;
8c11e798
WH
2241
2242 clear_context_table(iommu, bus, devfn);
2243 iommu->flush.flush_context(iommu, 0, 0, 0,
4c25a2c1 2244 DMA_CCMD_GLOBAL_INVL);
1f0ef2aa 2245 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
ba395927
KA
2246}
2247
109b9b04
DW
2248static inline void unlink_domain_info(struct device_domain_info *info)
2249{
2250 assert_spin_locked(&device_domain_lock);
2251 list_del(&info->link);
2252 list_del(&info->global);
2253 if (info->dev)
0bcb3e28 2254 info->dev->archdata.iommu = NULL;
109b9b04
DW
2255}
2256
ba395927
KA
2257static void domain_remove_dev_info(struct dmar_domain *domain)
2258{
3a74ca01 2259 struct device_domain_info *info, *tmp;
fb170fb4 2260 unsigned long flags;
ba395927
KA
2261
2262 spin_lock_irqsave(&device_domain_lock, flags);
76f45fe3 2263 list_for_each_entry_safe(info, tmp, &domain->devices, link)
127c7615 2264 __dmar_remove_one_dev_info(info);
ba395927
KA
2265 spin_unlock_irqrestore(&device_domain_lock, flags);
2266}
2267
2268/*
2269 * find_domain
1525a29a 2270 * Note: we use struct device->archdata.iommu stores the info
ba395927 2271 */
1525a29a 2272static struct dmar_domain *find_domain(struct device *dev)
ba395927
KA
2273{
2274 struct device_domain_info *info;
2275
2276 /* No lock here, assumes no domain exit in normal case */
1525a29a 2277 info = dev->archdata.iommu;
ba395927
KA
2278 if (info)
2279 return info->domain;
2280 return NULL;
2281}
2282
5a8f40e8 2283static inline struct device_domain_info *
745f2586
JL
2284dmar_search_domain_by_dev_info(int segment, int bus, int devfn)
2285{
2286 struct device_domain_info *info;
2287
2288 list_for_each_entry(info, &device_domain_list, global)
41e80dca 2289 if (info->iommu->segment == segment && info->bus == bus &&
745f2586 2290 info->devfn == devfn)
5a8f40e8 2291 return info;
745f2586
JL
2292
2293 return NULL;
2294}
2295
5db31569
JR
2296static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
2297 int bus, int devfn,
2298 struct device *dev,
2299 struct dmar_domain *domain)
745f2586 2300{
5a8f40e8 2301 struct dmar_domain *found = NULL;
745f2586
JL
2302 struct device_domain_info *info;
2303 unsigned long flags;
d160aca5 2304 int ret;
745f2586
JL
2305
2306 info = alloc_devinfo_mem();
2307 if (!info)
b718cd3d 2308 return NULL;
745f2586 2309
745f2586
JL
2310 info->bus = bus;
2311 info->devfn = devfn;
b16d0cb9
DW
2312 info->ats_supported = info->pasid_supported = info->pri_supported = 0;
2313 info->ats_enabled = info->pasid_enabled = info->pri_enabled = 0;
2314 info->ats_qdep = 0;
745f2586
JL
2315 info->dev = dev;
2316 info->domain = domain;
5a8f40e8 2317 info->iommu = iommu;
745f2586 2318
b16d0cb9
DW
2319 if (dev && dev_is_pci(dev)) {
2320 struct pci_dev *pdev = to_pci_dev(info->dev);
2321
2322 if (ecap_dev_iotlb_support(iommu->ecap) &&
2323 pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ATS) &&
2324 dmar_find_matched_atsr_unit(pdev))
2325 info->ats_supported = 1;
2326
2327 if (ecs_enabled(iommu)) {
2328 if (pasid_enabled(iommu)) {
2329 int features = pci_pasid_features(pdev);
2330 if (features >= 0)
2331 info->pasid_supported = features | 1;
2332 }
2333
2334 if (info->ats_supported && ecap_prs(iommu->ecap) &&
2335 pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI))
2336 info->pri_supported = 1;
2337 }
2338 }
2339
745f2586
JL
2340 spin_lock_irqsave(&device_domain_lock, flags);
2341 if (dev)
0bcb3e28 2342 found = find_domain(dev);
f303e507
JR
2343
2344 if (!found) {
5a8f40e8 2345 struct device_domain_info *info2;
41e80dca 2346 info2 = dmar_search_domain_by_dev_info(iommu->segment, bus, devfn);
f303e507
JR
2347 if (info2) {
2348 found = info2->domain;
2349 info2->dev = dev;
2350 }
5a8f40e8 2351 }
f303e507 2352
745f2586
JL
2353 if (found) {
2354 spin_unlock_irqrestore(&device_domain_lock, flags);
2355 free_devinfo_mem(info);
b718cd3d
DW
2356 /* Caller must free the original domain */
2357 return found;
745f2586
JL
2358 }
2359
d160aca5
JR
2360 spin_lock(&iommu->lock);
2361 ret = domain_attach_iommu(domain, iommu);
2362 spin_unlock(&iommu->lock);
2363
2364 if (ret) {
c6c2cebd
JR
2365 spin_unlock_irqrestore(&device_domain_lock, flags);
2366 return NULL;
2367 }
c6c2cebd 2368
b718cd3d
DW
2369 list_add(&info->link, &domain->devices);
2370 list_add(&info->global, &device_domain_list);
2371 if (dev)
2372 dev->archdata.iommu = info;
2373 spin_unlock_irqrestore(&device_domain_lock, flags);
2374
cc4e2575
JR
2375 if (dev && domain_context_mapping(domain, dev)) {
2376 pr_err("Domain context map for %s failed\n", dev_name(dev));
e6de0f8d 2377 dmar_remove_one_dev_info(domain, dev);
cc4e2575
JR
2378 return NULL;
2379 }
2380
b718cd3d 2381 return domain;
745f2586
JL
2382}
2383
579305f7
AW
2384static int get_last_alias(struct pci_dev *pdev, u16 alias, void *opaque)
2385{
2386 *(u16 *)opaque = alias;
2387 return 0;
2388}
2389
ba395927 2390/* domain is initialized */
146922ec 2391static struct dmar_domain *get_domain_for_dev(struct device *dev, int gaw)
ba395927 2392{
cc4e2575 2393 struct device_domain_info *info = NULL;
579305f7
AW
2394 struct dmar_domain *domain, *tmp;
2395 struct intel_iommu *iommu;
08a7f456 2396 u16 req_id, dma_alias;
ba395927 2397 unsigned long flags;
aa4d066a 2398 u8 bus, devfn;
ba395927 2399
146922ec 2400 domain = find_domain(dev);
ba395927
KA
2401 if (domain)
2402 return domain;
2403
579305f7
AW
2404 iommu = device_to_iommu(dev, &bus, &devfn);
2405 if (!iommu)
2406 return NULL;
2407
08a7f456
JR
2408 req_id = ((u16)bus << 8) | devfn;
2409
146922ec
DW
2410 if (dev_is_pci(dev)) {
2411 struct pci_dev *pdev = to_pci_dev(dev);
276dbf99 2412
579305f7
AW
2413 pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias);
2414
2415 spin_lock_irqsave(&device_domain_lock, flags);
2416 info = dmar_search_domain_by_dev_info(pci_domain_nr(pdev->bus),
2417 PCI_BUS_NUM(dma_alias),
2418 dma_alias & 0xff);
2419 if (info) {
2420 iommu = info->iommu;
2421 domain = info->domain;
5a8f40e8 2422 }
579305f7 2423 spin_unlock_irqrestore(&device_domain_lock, flags);
ba395927 2424
579305f7
AW
2425 /* DMA alias already has a domain, uses it */
2426 if (info)
2427 goto found_domain;
2428 }
ba395927 2429
146922ec 2430 /* Allocate and initialize new domain for the device */
ab8dfe25 2431 domain = alloc_domain(0);
745f2586 2432 if (!domain)
579305f7 2433 return NULL;
dc534b25 2434 if (domain_init(domain, iommu, gaw)) {
579305f7
AW
2435 domain_exit(domain);
2436 return NULL;
2c2e2c38 2437 }
ba395927 2438
579305f7 2439 /* register PCI DMA alias device */
08a7f456 2440 if (req_id != dma_alias && dev_is_pci(dev)) {
5db31569
JR
2441 tmp = dmar_insert_one_dev_info(iommu, PCI_BUS_NUM(dma_alias),
2442 dma_alias & 0xff, NULL, domain);
579305f7
AW
2443
2444 if (!tmp || tmp != domain) {
2445 domain_exit(domain);
2446 domain = tmp;
2447 }
2448
b718cd3d 2449 if (!domain)
579305f7 2450 return NULL;
ba395927
KA
2451 }
2452
2453found_domain:
5db31569 2454 tmp = dmar_insert_one_dev_info(iommu, bus, devfn, dev, domain);
579305f7
AW
2455
2456 if (!tmp || tmp != domain) {
2457 domain_exit(domain);
2458 domain = tmp;
2459 }
b718cd3d
DW
2460
2461 return domain;
ba395927
KA
2462}
2463
b213203e
DW
2464static int iommu_domain_identity_map(struct dmar_domain *domain,
2465 unsigned long long start,
2466 unsigned long long end)
ba395927 2467{
c5395d5c
DW
2468 unsigned long first_vpfn = start >> VTD_PAGE_SHIFT;
2469 unsigned long last_vpfn = end >> VTD_PAGE_SHIFT;
2470
2471 if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn),
2472 dma_to_mm_pfn(last_vpfn))) {
9f10e5bf 2473 pr_err("Reserving iova failed\n");
b213203e 2474 return -ENOMEM;
ba395927
KA
2475 }
2476
af1089ce 2477 pr_debug("Mapping reserved region %llx-%llx\n", start, end);
ba395927
KA
2478 /*
2479 * RMRR range might have overlap with physical memory range,
2480 * clear it first
2481 */
c5395d5c 2482 dma_pte_clear_range(domain, first_vpfn, last_vpfn);
ba395927 2483
c5395d5c
DW
2484 return domain_pfn_mapping(domain, first_vpfn, first_vpfn,
2485 last_vpfn - first_vpfn + 1,
61df7443 2486 DMA_PTE_READ|DMA_PTE_WRITE);
b213203e
DW
2487}
2488
0b9d9753 2489static int iommu_prepare_identity_map(struct device *dev,
b213203e
DW
2490 unsigned long long start,
2491 unsigned long long end)
2492{
2493 struct dmar_domain *domain;
2494 int ret;
2495
0b9d9753 2496 domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
b213203e
DW
2497 if (!domain)
2498 return -ENOMEM;
2499
19943b0e
DW
2500 /* For _hardware_ passthrough, don't bother. But for software
2501 passthrough, we do it anyway -- it may indicate a memory
2502 range which is reserved in E820, so which didn't get set
2503 up to start with in si_domain */
2504 if (domain == si_domain && hw_pass_through) {
9f10e5bf
JR
2505 pr_warn("Ignoring identity map for HW passthrough device %s [0x%Lx - 0x%Lx]\n",
2506 dev_name(dev), start, end);
19943b0e
DW
2507 return 0;
2508 }
2509
9f10e5bf
JR
2510 pr_info("Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
2511 dev_name(dev), start, end);
2512
5595b528
DW
2513 if (end < start) {
2514 WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n"
2515 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2516 dmi_get_system_info(DMI_BIOS_VENDOR),
2517 dmi_get_system_info(DMI_BIOS_VERSION),
2518 dmi_get_system_info(DMI_PRODUCT_VERSION));
2519 ret = -EIO;
2520 goto error;
2521 }
2522
2ff729f5
DW
2523 if (end >> agaw_to_width(domain->agaw)) {
2524 WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n"
2525 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2526 agaw_to_width(domain->agaw),
2527 dmi_get_system_info(DMI_BIOS_VENDOR),
2528 dmi_get_system_info(DMI_BIOS_VERSION),
2529 dmi_get_system_info(DMI_PRODUCT_VERSION));
2530 ret = -EIO;
2531 goto error;
2532 }
19943b0e 2533
b213203e 2534 ret = iommu_domain_identity_map(domain, start, end);
ba395927
KA
2535 if (ret)
2536 goto error;
2537
b213203e
DW
2538 return 0;
2539
2540 error:
ba395927
KA
2541 domain_exit(domain);
2542 return ret;
ba395927
KA
2543}
2544
2545static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
0b9d9753 2546 struct device *dev)
ba395927 2547{
0b9d9753 2548 if (dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
ba395927 2549 return 0;
0b9d9753
DW
2550 return iommu_prepare_identity_map(dev, rmrr->base_address,
2551 rmrr->end_address);
ba395927
KA
2552}
2553
d3f13810 2554#ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA
49a0429e
KA
2555static inline void iommu_prepare_isa(void)
2556{
2557 struct pci_dev *pdev;
2558 int ret;
2559
2560 pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
2561 if (!pdev)
2562 return;
2563
9f10e5bf 2564 pr_info("Prepare 0-16MiB unity mapping for LPC\n");
0b9d9753 2565 ret = iommu_prepare_identity_map(&pdev->dev, 0, 16*1024*1024 - 1);
49a0429e
KA
2566
2567 if (ret)
9f10e5bf 2568 pr_err("Failed to create 0-16MiB identity map - floppy might not work\n");
49a0429e 2569
9b27e82d 2570 pci_dev_put(pdev);
49a0429e
KA
2571}
2572#else
2573static inline void iommu_prepare_isa(void)
2574{
2575 return;
2576}
d3f13810 2577#endif /* !CONFIG_INTEL_IOMMU_FLPY_WA */
49a0429e 2578
2c2e2c38 2579static int md_domain_init(struct dmar_domain *domain, int guest_width);
c7ab48d2 2580
071e1374 2581static int __init si_domain_init(int hw)
2c2e2c38 2582{
c7ab48d2 2583 int nid, ret = 0;
2c2e2c38 2584
ab8dfe25 2585 si_domain = alloc_domain(DOMAIN_FLAG_STATIC_IDENTITY);
2c2e2c38
FY
2586 if (!si_domain)
2587 return -EFAULT;
2588
2c2e2c38
FY
2589 if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2590 domain_exit(si_domain);
2591 return -EFAULT;
2592 }
2593
0dc79715 2594 pr_debug("Identity mapping domain allocated\n");
2c2e2c38 2595
19943b0e
DW
2596 if (hw)
2597 return 0;
2598
c7ab48d2 2599 for_each_online_node(nid) {
5dfe8660
TH
2600 unsigned long start_pfn, end_pfn;
2601 int i;
2602
2603 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
2604 ret = iommu_domain_identity_map(si_domain,
2605 PFN_PHYS(start_pfn), PFN_PHYS(end_pfn));
2606 if (ret)
2607 return ret;
2608 }
c7ab48d2
DW
2609 }
2610
2c2e2c38
FY
2611 return 0;
2612}
2613
9b226624 2614static int identity_mapping(struct device *dev)
2c2e2c38
FY
2615{
2616 struct device_domain_info *info;
2617
2618 if (likely(!iommu_identity_mapping))
2619 return 0;
2620
9b226624 2621 info = dev->archdata.iommu;
cb452a40
MT
2622 if (info && info != DUMMY_DEVICE_DOMAIN_INFO)
2623 return (info->domain == si_domain);
2c2e2c38 2624
2c2e2c38
FY
2625 return 0;
2626}
2627
28ccce0d 2628static int domain_add_dev_info(struct dmar_domain *domain, struct device *dev)
2c2e2c38 2629{
0ac72664 2630 struct dmar_domain *ndomain;
5a8f40e8 2631 struct intel_iommu *iommu;
156baca8 2632 u8 bus, devfn;
2c2e2c38 2633
5913c9bf 2634 iommu = device_to_iommu(dev, &bus, &devfn);
5a8f40e8
DW
2635 if (!iommu)
2636 return -ENODEV;
2637
5db31569 2638 ndomain = dmar_insert_one_dev_info(iommu, bus, devfn, dev, domain);
0ac72664
DW
2639 if (ndomain != domain)
2640 return -EBUSY;
2c2e2c38
FY
2641
2642 return 0;
2643}
2644
0b9d9753 2645static bool device_has_rmrr(struct device *dev)
ea2447f7
TM
2646{
2647 struct dmar_rmrr_unit *rmrr;
832bd858 2648 struct device *tmp;
ea2447f7
TM
2649 int i;
2650
0e242612 2651 rcu_read_lock();
ea2447f7 2652 for_each_rmrr_units(rmrr) {
b683b230
JL
2653 /*
2654 * Return TRUE if this RMRR contains the device that
2655 * is passed in.
2656 */
2657 for_each_active_dev_scope(rmrr->devices,
2658 rmrr->devices_cnt, i, tmp)
0b9d9753 2659 if (tmp == dev) {
0e242612 2660 rcu_read_unlock();
ea2447f7 2661 return true;
b683b230 2662 }
ea2447f7 2663 }
0e242612 2664 rcu_read_unlock();
ea2447f7
TM
2665 return false;
2666}
2667
c875d2c1
AW
2668/*
2669 * There are a couple cases where we need to restrict the functionality of
2670 * devices associated with RMRRs. The first is when evaluating a device for
2671 * identity mapping because problems exist when devices are moved in and out
2672 * of domains and their respective RMRR information is lost. This means that
2673 * a device with associated RMRRs will never be in a "passthrough" domain.
2674 * The second is use of the device through the IOMMU API. This interface
2675 * expects to have full control of the IOVA space for the device. We cannot
2676 * satisfy both the requirement that RMRR access is maintained and have an
2677 * unencumbered IOVA space. We also have no ability to quiesce the device's
2678 * use of the RMRR space or even inform the IOMMU API user of the restriction.
2679 * We therefore prevent devices associated with an RMRR from participating in
2680 * the IOMMU API, which eliminates them from device assignment.
2681 *
2682 * In both cases we assume that PCI USB devices with RMRRs have them largely
2683 * for historical reasons and that the RMRR space is not actively used post
2684 * boot. This exclusion may change if vendors begin to abuse it.
18436afd
DW
2685 *
2686 * The same exception is made for graphics devices, with the requirement that
2687 * any use of the RMRR regions will be torn down before assigning the device
2688 * to a guest.
c875d2c1
AW
2689 */
2690static bool device_is_rmrr_locked(struct device *dev)
2691{
2692 if (!device_has_rmrr(dev))
2693 return false;
2694
2695 if (dev_is_pci(dev)) {
2696 struct pci_dev *pdev = to_pci_dev(dev);
2697
18436afd 2698 if (IS_USB_DEVICE(pdev) || IS_GFX_DEVICE(pdev))
c875d2c1
AW
2699 return false;
2700 }
2701
2702 return true;
2703}
2704
3bdb2591 2705static int iommu_should_identity_map(struct device *dev, int startup)
6941af28 2706{
ea2447f7 2707
3bdb2591
DW
2708 if (dev_is_pci(dev)) {
2709 struct pci_dev *pdev = to_pci_dev(dev);
ea2447f7 2710
c875d2c1 2711 if (device_is_rmrr_locked(dev))
3bdb2591 2712 return 0;
e0fc7e0b 2713
3bdb2591
DW
2714 if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
2715 return 1;
e0fc7e0b 2716
3bdb2591
DW
2717 if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
2718 return 1;
6941af28 2719
3bdb2591 2720 if (!(iommu_identity_mapping & IDENTMAP_ALL))
3dfc813d 2721 return 0;
3bdb2591
DW
2722
2723 /*
2724 * We want to start off with all devices in the 1:1 domain, and
2725 * take them out later if we find they can't access all of memory.
2726 *
2727 * However, we can't do this for PCI devices behind bridges,
2728 * because all PCI devices behind the same bridge will end up
2729 * with the same source-id on their transactions.
2730 *
2731 * Practically speaking, we can't change things around for these
2732 * devices at run-time, because we can't be sure there'll be no
2733 * DMA transactions in flight for any of their siblings.
2734 *
2735 * So PCI devices (unless they're on the root bus) as well as
2736 * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of
2737 * the 1:1 domain, just in _case_ one of their siblings turns out
2738 * not to be able to map all of memory.
2739 */
2740 if (!pci_is_pcie(pdev)) {
2741 if (!pci_is_root_bus(pdev->bus))
2742 return 0;
2743 if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI)
2744 return 0;
2745 } else if (pci_pcie_type(pdev) == PCI_EXP_TYPE_PCI_BRIDGE)
3dfc813d 2746 return 0;
3bdb2591
DW
2747 } else {
2748 if (device_has_rmrr(dev))
2749 return 0;
2750 }
3dfc813d 2751
3bdb2591 2752 /*
3dfc813d 2753 * At boot time, we don't yet know if devices will be 64-bit capable.
3bdb2591 2754 * Assume that they will — if they turn out not to be, then we can
3dfc813d
DW
2755 * take them out of the 1:1 domain later.
2756 */
8fcc5372
CW
2757 if (!startup) {
2758 /*
2759 * If the device's dma_mask is less than the system's memory
2760 * size then this is not a candidate for identity mapping.
2761 */
3bdb2591 2762 u64 dma_mask = *dev->dma_mask;
8fcc5372 2763
3bdb2591
DW
2764 if (dev->coherent_dma_mask &&
2765 dev->coherent_dma_mask < dma_mask)
2766 dma_mask = dev->coherent_dma_mask;
8fcc5372 2767
3bdb2591 2768 return dma_mask >= dma_get_required_mask(dev);
8fcc5372 2769 }
6941af28
DW
2770
2771 return 1;
2772}
2773
cf04eee8
DW
2774static int __init dev_prepare_static_identity_mapping(struct device *dev, int hw)
2775{
2776 int ret;
2777
2778 if (!iommu_should_identity_map(dev, 1))
2779 return 0;
2780
28ccce0d 2781 ret = domain_add_dev_info(si_domain, dev);
cf04eee8 2782 if (!ret)
9f10e5bf
JR
2783 pr_info("%s identity mapping for device %s\n",
2784 hw ? "Hardware" : "Software", dev_name(dev));
cf04eee8
DW
2785 else if (ret == -ENODEV)
2786 /* device not associated with an iommu */
2787 ret = 0;
2788
2789 return ret;
2790}
2791
2792
071e1374 2793static int __init iommu_prepare_static_identity_mapping(int hw)
2c2e2c38 2794{
2c2e2c38 2795 struct pci_dev *pdev = NULL;
cf04eee8
DW
2796 struct dmar_drhd_unit *drhd;
2797 struct intel_iommu *iommu;
2798 struct device *dev;
2799 int i;
2800 int ret = 0;
2c2e2c38 2801
2c2e2c38 2802 for_each_pci_dev(pdev) {
cf04eee8
DW
2803 ret = dev_prepare_static_identity_mapping(&pdev->dev, hw);
2804 if (ret)
2805 return ret;
2806 }
2807
2808 for_each_active_iommu(iommu, drhd)
2809 for_each_active_dev_scope(drhd->devices, drhd->devices_cnt, i, dev) {
2810 struct acpi_device_physical_node *pn;
2811 struct acpi_device *adev;
2812
2813 if (dev->bus != &acpi_bus_type)
2814 continue;
86080ccc 2815
cf04eee8
DW
2816 adev= to_acpi_device(dev);
2817 mutex_lock(&adev->physical_node_lock);
2818 list_for_each_entry(pn, &adev->physical_node_list, node) {
2819 ret = dev_prepare_static_identity_mapping(pn->dev, hw);
2820 if (ret)
2821 break;
eae460b6 2822 }
cf04eee8
DW
2823 mutex_unlock(&adev->physical_node_lock);
2824 if (ret)
2825 return ret;
62edf5dc 2826 }
2c2e2c38
FY
2827
2828 return 0;
2829}
2830
ffebeb46
JL
2831static void intel_iommu_init_qi(struct intel_iommu *iommu)
2832{
2833 /*
2834 * Start from the sane iommu hardware state.
2835 * If the queued invalidation is already initialized by us
2836 * (for example, while enabling interrupt-remapping) then
2837 * we got the things already rolling from a sane state.
2838 */
2839 if (!iommu->qi) {
2840 /*
2841 * Clear any previous faults.
2842 */
2843 dmar_fault(-1, iommu);
2844 /*
2845 * Disable queued invalidation if supported and already enabled
2846 * before OS handover.
2847 */
2848 dmar_disable_qi(iommu);
2849 }
2850
2851 if (dmar_enable_qi(iommu)) {
2852 /*
2853 * Queued Invalidate not enabled, use Register Based Invalidate
2854 */
2855 iommu->flush.flush_context = __iommu_flush_context;
2856 iommu->flush.flush_iotlb = __iommu_flush_iotlb;
9f10e5bf 2857 pr_info("%s: Using Register based invalidation\n",
ffebeb46
JL
2858 iommu->name);
2859 } else {
2860 iommu->flush.flush_context = qi_flush_context;
2861 iommu->flush.flush_iotlb = qi_flush_iotlb;
9f10e5bf 2862 pr_info("%s: Using Queued invalidation\n", iommu->name);
ffebeb46
JL
2863 }
2864}
2865
091d42e4 2866static int copy_context_table(struct intel_iommu *iommu,
543c8dcf 2867 struct root_entry __iomem *old_re,
091d42e4
JR
2868 struct context_entry **tbl,
2869 int bus, bool ext)
2870{
dbcd861f 2871 int tbl_idx, pos = 0, idx, devfn, ret = 0, did;
543c8dcf
JR
2872 struct context_entry __iomem *old_ce = NULL;
2873 struct context_entry *new_ce = NULL, ce;
2874 struct root_entry re;
091d42e4
JR
2875 phys_addr_t old_ce_phys;
2876
2877 tbl_idx = ext ? bus * 2 : bus;
543c8dcf 2878 memcpy_fromio(&re, old_re, sizeof(re));
091d42e4
JR
2879
2880 for (devfn = 0; devfn < 256; devfn++) {
2881 /* First calculate the correct index */
2882 idx = (ext ? devfn * 2 : devfn) % 256;
2883
2884 if (idx == 0) {
2885 /* First save what we may have and clean up */
2886 if (new_ce) {
2887 tbl[tbl_idx] = new_ce;
2888 __iommu_flush_cache(iommu, new_ce,
2889 VTD_PAGE_SIZE);
2890 pos = 1;
2891 }
2892
2893 if (old_ce)
2894 iounmap(old_ce);
2895
2896 ret = 0;
2897 if (devfn < 0x80)
543c8dcf 2898 old_ce_phys = root_entry_lctp(&re);
091d42e4 2899 else
543c8dcf 2900 old_ce_phys = root_entry_uctp(&re);
091d42e4
JR
2901
2902 if (!old_ce_phys) {
2903 if (ext && devfn == 0) {
2904 /* No LCTP, try UCTP */
2905 devfn = 0x7f;
2906 continue;
2907 } else {
2908 goto out;
2909 }
2910 }
2911
2912 ret = -ENOMEM;
2913 old_ce = ioremap_cache(old_ce_phys, PAGE_SIZE);
2914 if (!old_ce)
2915 goto out;
2916
2917 new_ce = alloc_pgtable_page(iommu->node);
2918 if (!new_ce)
2919 goto out_unmap;
2920
2921 ret = 0;
2922 }
2923
2924 /* Now copy the context entry */
543c8dcf 2925 memcpy_fromio(&ce, old_ce + idx, sizeof(ce));
091d42e4 2926
cf484d0e 2927 if (!__context_present(&ce))
091d42e4
JR
2928 continue;
2929
dbcd861f
JR
2930 did = context_domain_id(&ce);
2931 if (did >= 0 && did < cap_ndoms(iommu->cap))
2932 set_bit(did, iommu->domain_ids);
2933
cf484d0e
JR
2934 /*
2935 * We need a marker for copied context entries. This
2936 * marker needs to work for the old format as well as
2937 * for extended context entries.
2938 *
2939 * Bit 67 of the context entry is used. In the old
2940 * format this bit is available to software, in the
2941 * extended format it is the PGE bit, but PGE is ignored
2942 * by HW if PASIDs are disabled (and thus still
2943 * available).
2944 *
2945 * So disable PASIDs first and then mark the entry
2946 * copied. This means that we don't copy PASID
2947 * translations from the old kernel, but this is fine as
2948 * faults there are not fatal.
2949 */
2950 context_clear_pasid_enable(&ce);
2951 context_set_copied(&ce);
2952
091d42e4
JR
2953 new_ce[idx] = ce;
2954 }
2955
2956 tbl[tbl_idx + pos] = new_ce;
2957
2958 __iommu_flush_cache(iommu, new_ce, VTD_PAGE_SIZE);
2959
2960out_unmap:
2961 iounmap(old_ce);
2962
2963out:
2964 return ret;
2965}
2966
2967static int copy_translation_tables(struct intel_iommu *iommu)
2968{
543c8dcf 2969 struct root_entry __iomem *old_rt;
091d42e4 2970 struct context_entry **ctxt_tbls;
091d42e4
JR
2971 phys_addr_t old_rt_phys;
2972 int ctxt_table_entries;
2973 unsigned long flags;
2974 u64 rtaddr_reg;
2975 int bus, ret;
c3361f2f 2976 bool new_ext, ext;
091d42e4
JR
2977
2978 rtaddr_reg = dmar_readq(iommu->reg + DMAR_RTADDR_REG);
2979 ext = !!(rtaddr_reg & DMA_RTADDR_RTT);
c3361f2f
JR
2980 new_ext = !!ecap_ecs(iommu->ecap);
2981
2982 /*
2983 * The RTT bit can only be changed when translation is disabled,
2984 * but disabling translation means to open a window for data
2985 * corruption. So bail out and don't copy anything if we would
2986 * have to change the bit.
2987 */
2988 if (new_ext != ext)
2989 return -EINVAL;
091d42e4
JR
2990
2991 old_rt_phys = rtaddr_reg & VTD_PAGE_MASK;
2992 if (!old_rt_phys)
2993 return -EINVAL;
2994
2995 old_rt = ioremap_cache(old_rt_phys, PAGE_SIZE);
2996 if (!old_rt)
2997 return -ENOMEM;
2998
2999 /* This is too big for the stack - allocate it from slab */
3000 ctxt_table_entries = ext ? 512 : 256;
3001 ret = -ENOMEM;
3002 ctxt_tbls = kzalloc(ctxt_table_entries * sizeof(void *), GFP_KERNEL);
3003 if (!ctxt_tbls)
3004 goto out_unmap;
3005
3006 for (bus = 0; bus < 256; bus++) {
3007 ret = copy_context_table(iommu, &old_rt[bus],
3008 ctxt_tbls, bus, ext);
3009 if (ret) {
3010 pr_err("%s: Failed to copy context table for bus %d\n",
3011 iommu->name, bus);
3012 continue;
3013 }
3014 }
3015
3016 spin_lock_irqsave(&iommu->lock, flags);
3017
3018 /* Context tables are copied, now write them to the root_entry table */
3019 for (bus = 0; bus < 256; bus++) {
3020 int idx = ext ? bus * 2 : bus;
3021 u64 val;
3022
3023 if (ctxt_tbls[idx]) {
3024 val = virt_to_phys(ctxt_tbls[idx]) | 1;
3025 iommu->root_entry[bus].lo = val;
3026 }
3027
3028 if (!ext || !ctxt_tbls[idx + 1])
3029 continue;
3030
3031 val = virt_to_phys(ctxt_tbls[idx + 1]) | 1;
3032 iommu->root_entry[bus].hi = val;
3033 }
3034
3035 spin_unlock_irqrestore(&iommu->lock, flags);
3036
3037 kfree(ctxt_tbls);
3038
3039 __iommu_flush_cache(iommu, iommu->root_entry, PAGE_SIZE);
3040
3041 ret = 0;
3042
3043out_unmap:
3044 iounmap(old_rt);
3045
3046 return ret;
3047}
3048
b779260b 3049static int __init init_dmars(void)
ba395927
KA
3050{
3051 struct dmar_drhd_unit *drhd;
3052 struct dmar_rmrr_unit *rmrr;
a87f4918 3053 bool copied_tables = false;
832bd858 3054 struct device *dev;
ba395927 3055 struct intel_iommu *iommu;
9d783ba0 3056 int i, ret;
2c2e2c38 3057
ba395927
KA
3058 /*
3059 * for each drhd
3060 * allocate root
3061 * initialize and program root entry to not present
3062 * endfor
3063 */
3064 for_each_drhd_unit(drhd) {
5e0d2a6f 3065 /*
3066 * lock not needed as this is only incremented in the single
3067 * threaded kernel __init code path all other access are read
3068 * only
3069 */
78d8e704 3070 if (g_num_of_iommus < DMAR_UNITS_SUPPORTED) {
1b198bb0
MT
3071 g_num_of_iommus++;
3072 continue;
3073 }
9f10e5bf 3074 pr_err_once("Exceeded %d IOMMUs\n", DMAR_UNITS_SUPPORTED);
5e0d2a6f 3075 }
3076
ffebeb46
JL
3077 /* Preallocate enough resources for IOMMU hot-addition */
3078 if (g_num_of_iommus < DMAR_UNITS_SUPPORTED)
3079 g_num_of_iommus = DMAR_UNITS_SUPPORTED;
3080
d9630fe9
WH
3081 g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
3082 GFP_KERNEL);
3083 if (!g_iommus) {
9f10e5bf 3084 pr_err("Allocating global iommu array failed\n");
d9630fe9
WH
3085 ret = -ENOMEM;
3086 goto error;
3087 }
3088
80b20dd8 3089 deferred_flush = kzalloc(g_num_of_iommus *
3090 sizeof(struct deferred_flush_tables), GFP_KERNEL);
3091 if (!deferred_flush) {
5e0d2a6f 3092 ret = -ENOMEM;
989d51fc 3093 goto free_g_iommus;
5e0d2a6f 3094 }
3095
7c919779 3096 for_each_active_iommu(iommu, drhd) {
d9630fe9 3097 g_iommus[iommu->seq_id] = iommu;
ba395927 3098
b63d80d1
JR
3099 intel_iommu_init_qi(iommu);
3100
e61d98d8
SS
3101 ret = iommu_init_domains(iommu);
3102 if (ret)
989d51fc 3103 goto free_iommu;
e61d98d8 3104
4158c2ec
JR
3105 init_translation_status(iommu);
3106
091d42e4
JR
3107 if (translation_pre_enabled(iommu) && !is_kdump_kernel()) {
3108 iommu_disable_translation(iommu);
3109 clear_translation_pre_enabled(iommu);
3110 pr_warn("Translation was enabled for %s but we are not in kdump mode\n",
3111 iommu->name);
3112 }
4158c2ec 3113
ba395927
KA
3114 /*
3115 * TBD:
3116 * we could share the same root & context tables
25985edc 3117 * among all IOMMU's. Need to Split it later.
ba395927
KA
3118 */
3119 ret = iommu_alloc_root_entry(iommu);
ffebeb46 3120 if (ret)
989d51fc 3121 goto free_iommu;
5f0a7f76 3122
091d42e4
JR
3123 if (translation_pre_enabled(iommu)) {
3124 pr_info("Translation already enabled - trying to copy translation structures\n");
3125
3126 ret = copy_translation_tables(iommu);
3127 if (ret) {
3128 /*
3129 * We found the IOMMU with translation
3130 * enabled - but failed to copy over the
3131 * old root-entry table. Try to proceed
3132 * by disabling translation now and
3133 * allocating a clean root-entry table.
3134 * This might cause DMAR faults, but
3135 * probably the dump will still succeed.
3136 */
3137 pr_err("Failed to copy translation tables from previous kernel for %s\n",
3138 iommu->name);
3139 iommu_disable_translation(iommu);
3140 clear_translation_pre_enabled(iommu);
3141 } else {
3142 pr_info("Copied translation tables from previous kernel for %s\n",
3143 iommu->name);
a87f4918 3144 copied_tables = true;
091d42e4
JR
3145 }
3146 }
3147
5f0a7f76
JR
3148 iommu_flush_write_buffer(iommu);
3149 iommu_set_root_entry(iommu);
3150 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
3151 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
3152
4ed0d3e6 3153 if (!ecap_pass_through(iommu->ecap))
19943b0e 3154 hw_pass_through = 0;
8a94ade4
DW
3155#ifdef CONFIG_INTEL_IOMMU_SVM
3156 if (pasid_enabled(iommu))
3157 intel_svm_alloc_pasid_tables(iommu);
3158#endif
ba395927
KA
3159 }
3160
19943b0e 3161 if (iommu_pass_through)
e0fc7e0b
DW
3162 iommu_identity_mapping |= IDENTMAP_ALL;
3163
d3f13810 3164#ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA
e0fc7e0b 3165 iommu_identity_mapping |= IDENTMAP_GFX;
19943b0e 3166#endif
e0fc7e0b 3167
86080ccc
JR
3168 if (iommu_identity_mapping) {
3169 ret = si_domain_init(hw_pass_through);
3170 if (ret)
3171 goto free_iommu;
3172 }
3173
e0fc7e0b
DW
3174 check_tylersburg_isoch();
3175
a87f4918
JR
3176 /*
3177 * If we copied translations from a previous kernel in the kdump
3178 * case, we can not assign the devices to domains now, as that
3179 * would eliminate the old mappings. So skip this part and defer
3180 * the assignment to device driver initialization time.
3181 */
3182 if (copied_tables)
3183 goto domains_done;
3184
ba395927 3185 /*
19943b0e
DW
3186 * If pass through is not set or not enabled, setup context entries for
3187 * identity mappings for rmrr, gfx, and isa and may fall back to static
3188 * identity mapping if iommu_identity_mapping is set.
ba395927 3189 */
19943b0e
DW
3190 if (iommu_identity_mapping) {
3191 ret = iommu_prepare_static_identity_mapping(hw_pass_through);
4ed0d3e6 3192 if (ret) {
9f10e5bf 3193 pr_crit("Failed to setup IOMMU pass-through\n");
989d51fc 3194 goto free_iommu;
ba395927
KA
3195 }
3196 }
ba395927 3197 /*
19943b0e
DW
3198 * For each rmrr
3199 * for each dev attached to rmrr
3200 * do
3201 * locate drhd for dev, alloc domain for dev
3202 * allocate free domain
3203 * allocate page table entries for rmrr
3204 * if context not allocated for bus
3205 * allocate and init context
3206 * set present in root table for this bus
3207 * init context with domain, translation etc
3208 * endfor
3209 * endfor
ba395927 3210 */
9f10e5bf 3211 pr_info("Setting RMRR:\n");
19943b0e 3212 for_each_rmrr_units(rmrr) {
b683b230
JL
3213 /* some BIOS lists non-exist devices in DMAR table. */
3214 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
832bd858 3215 i, dev) {
0b9d9753 3216 ret = iommu_prepare_rmrr_dev(rmrr, dev);
19943b0e 3217 if (ret)
9f10e5bf 3218 pr_err("Mapping reserved region failed\n");
ba395927 3219 }
4ed0d3e6 3220 }
49a0429e 3221
19943b0e
DW
3222 iommu_prepare_isa();
3223
a87f4918
JR
3224domains_done:
3225
ba395927
KA
3226 /*
3227 * for each drhd
3228 * enable fault log
3229 * global invalidate context cache
3230 * global invalidate iotlb
3231 * enable translation
3232 */
7c919779 3233 for_each_iommu(iommu, drhd) {
51a63e67
JC
3234 if (drhd->ignored) {
3235 /*
3236 * we always have to disable PMRs or DMA may fail on
3237 * this device
3238 */
3239 if (force_on)
7c919779 3240 iommu_disable_protect_mem_regions(iommu);
ba395927 3241 continue;
51a63e67 3242 }
ba395927
KA
3243
3244 iommu_flush_write_buffer(iommu);
3245
3460a6d9
KA
3246 ret = dmar_set_interrupt(iommu);
3247 if (ret)
989d51fc 3248 goto free_iommu;
3460a6d9 3249
8939ddf6
JR
3250 if (!translation_pre_enabled(iommu))
3251 iommu_enable_translation(iommu);
3252
b94996c9 3253 iommu_disable_protect_mem_regions(iommu);
ba395927
KA
3254 }
3255
3256 return 0;
989d51fc
JL
3257
3258free_iommu:
ffebeb46
JL
3259 for_each_active_iommu(iommu, drhd) {
3260 disable_dmar_iommu(iommu);
a868e6b7 3261 free_dmar_iommu(iommu);
ffebeb46 3262 }
9bdc531e 3263 kfree(deferred_flush);
989d51fc 3264free_g_iommus:
d9630fe9 3265 kfree(g_iommus);
989d51fc 3266error:
ba395927
KA
3267 return ret;
3268}
3269
5a5e02a6 3270/* This takes a number of _MM_ pages, not VTD pages */
875764de
DW
3271static struct iova *intel_alloc_iova(struct device *dev,
3272 struct dmar_domain *domain,
3273 unsigned long nrpages, uint64_t dma_mask)
ba395927 3274{
ba395927 3275 struct iova *iova = NULL;
ba395927 3276
875764de
DW
3277 /* Restrict dma_mask to the width that the iommu can handle */
3278 dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask);
8f6429c7
RM
3279 /* Ensure we reserve the whole size-aligned region */
3280 nrpages = __roundup_pow_of_two(nrpages);
875764de
DW
3281
3282 if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) {
ba395927
KA
3283 /*
3284 * First try to allocate an io virtual address in
284901a9 3285 * DMA_BIT_MASK(32) and if that fails then try allocating
3609801e 3286 * from higher range
ba395927 3287 */
875764de
DW
3288 iova = alloc_iova(&domain->iovad, nrpages,
3289 IOVA_PFN(DMA_BIT_MASK(32)), 1);
3290 if (iova)
3291 return iova;
3292 }
3293 iova = alloc_iova(&domain->iovad, nrpages, IOVA_PFN(dma_mask), 1);
3294 if (unlikely(!iova)) {
9f10e5bf 3295 pr_err("Allocating %ld-page iova for %s failed",
207e3592 3296 nrpages, dev_name(dev));
f76aec76
KA
3297 return NULL;
3298 }
3299
3300 return iova;
3301}
3302
d4b709f4 3303static struct dmar_domain *__get_valid_domain_for_dev(struct device *dev)
f76aec76
KA
3304{
3305 struct dmar_domain *domain;
f76aec76 3306
d4b709f4 3307 domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
f76aec76 3308 if (!domain) {
9f10e5bf 3309 pr_err("Allocating domain for %s failed\n",
d4b709f4 3310 dev_name(dev));
4fe05bbc 3311 return NULL;
ba395927
KA
3312 }
3313
f76aec76
KA
3314 return domain;
3315}
3316
d4b709f4 3317static inline struct dmar_domain *get_valid_domain_for_dev(struct device *dev)
147202aa
DW
3318{
3319 struct device_domain_info *info;
3320
3321 /* No lock here, assumes no domain exit in normal case */
d4b709f4 3322 info = dev->archdata.iommu;
147202aa
DW
3323 if (likely(info))
3324 return info->domain;
3325
3326 return __get_valid_domain_for_dev(dev);
3327}
3328
ecb509ec 3329/* Check if the dev needs to go through non-identity map and unmap process.*/
73676832 3330static int iommu_no_mapping(struct device *dev)
2c2e2c38
FY
3331{
3332 int found;
3333
3d89194a 3334 if (iommu_dummy(dev))
1e4c64c4
DW
3335 return 1;
3336
2c2e2c38 3337 if (!iommu_identity_mapping)
1e4c64c4 3338 return 0;
2c2e2c38 3339
9b226624 3340 found = identity_mapping(dev);
2c2e2c38 3341 if (found) {
ecb509ec 3342 if (iommu_should_identity_map(dev, 0))
2c2e2c38
FY
3343 return 1;
3344 else {
3345 /*
3346 * 32 bit DMA is removed from si_domain and fall back
3347 * to non-identity mapping.
3348 */
e6de0f8d 3349 dmar_remove_one_dev_info(si_domain, dev);
9f10e5bf
JR
3350 pr_info("32bit %s uses non-identity mapping\n",
3351 dev_name(dev));
2c2e2c38
FY
3352 return 0;
3353 }
3354 } else {
3355 /*
3356 * In case of a detached 64 bit DMA device from vm, the device
3357 * is put into si_domain for identity mapping.
3358 */
ecb509ec 3359 if (iommu_should_identity_map(dev, 0)) {
2c2e2c38 3360 int ret;
28ccce0d 3361 ret = domain_add_dev_info(si_domain, dev);
2c2e2c38 3362 if (!ret) {
9f10e5bf
JR
3363 pr_info("64bit %s uses identity mapping\n",
3364 dev_name(dev));
2c2e2c38
FY
3365 return 1;
3366 }
3367 }
3368 }
3369
1e4c64c4 3370 return 0;
2c2e2c38
FY
3371}
3372
5040a918 3373static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr,
bb9e6d65 3374 size_t size, int dir, u64 dma_mask)
f76aec76 3375{
f76aec76 3376 struct dmar_domain *domain;
5b6985ce 3377 phys_addr_t start_paddr;
f76aec76
KA
3378 struct iova *iova;
3379 int prot = 0;
6865f0d1 3380 int ret;
8c11e798 3381 struct intel_iommu *iommu;
33041ec0 3382 unsigned long paddr_pfn = paddr >> PAGE_SHIFT;
f76aec76
KA
3383
3384 BUG_ON(dir == DMA_NONE);
2c2e2c38 3385
5040a918 3386 if (iommu_no_mapping(dev))
6865f0d1 3387 return paddr;
f76aec76 3388
5040a918 3389 domain = get_valid_domain_for_dev(dev);
f76aec76
KA
3390 if (!domain)
3391 return 0;
3392
8c11e798 3393 iommu = domain_get_iommu(domain);
88cb6a74 3394 size = aligned_nrpages(paddr, size);
f76aec76 3395
5040a918 3396 iova = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size), dma_mask);
f76aec76
KA
3397 if (!iova)
3398 goto error;
3399
ba395927
KA
3400 /*
3401 * Check if DMAR supports zero-length reads on write only
3402 * mappings..
3403 */
3404 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
8c11e798 3405 !cap_zlr(iommu->cap))
ba395927
KA
3406 prot |= DMA_PTE_READ;
3407 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3408 prot |= DMA_PTE_WRITE;
3409 /*
6865f0d1 3410 * paddr - (paddr + size) might be partial page, we should map the whole
ba395927 3411 * page. Note: if two part of one page are separately mapped, we
6865f0d1 3412 * might have two guest_addr mapping to the same host paddr, but this
ba395927
KA
3413 * is not a big problem
3414 */
0ab36de2 3415 ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova->pfn_lo),
33041ec0 3416 mm_to_dma_pfn(paddr_pfn), size, prot);
ba395927
KA
3417 if (ret)
3418 goto error;
3419
1f0ef2aa
DW
3420 /* it's a non-present to present mapping. Only flush if caching mode */
3421 if (cap_caching_mode(iommu->cap))
a1ddcbe9
JR
3422 iommu_flush_iotlb_psi(iommu, domain,
3423 mm_to_dma_pfn(iova->pfn_lo),
3424 size, 0, 1);
1f0ef2aa 3425 else
8c11e798 3426 iommu_flush_write_buffer(iommu);
f76aec76 3427
03d6a246
DW
3428 start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
3429 start_paddr += paddr & ~PAGE_MASK;
3430 return start_paddr;
ba395927 3431
ba395927 3432error:
f76aec76
KA
3433 if (iova)
3434 __free_iova(&domain->iovad, iova);
9f10e5bf 3435 pr_err("Device %s request: %zx@%llx dir %d --- failed\n",
5040a918 3436 dev_name(dev), size, (unsigned long long)paddr, dir);
ba395927
KA
3437 return 0;
3438}
3439
ffbbef5c
FT
3440static dma_addr_t intel_map_page(struct device *dev, struct page *page,
3441 unsigned long offset, size_t size,
3442 enum dma_data_direction dir,
3443 struct dma_attrs *attrs)
bb9e6d65 3444{
ffbbef5c 3445 return __intel_map_single(dev, page_to_phys(page) + offset, size,
46333e37 3446 dir, *dev->dma_mask);
bb9e6d65
FT
3447}
3448
5e0d2a6f 3449static void flush_unmaps(void)
3450{
80b20dd8 3451 int i, j;
5e0d2a6f 3452
5e0d2a6f 3453 timer_on = 0;
3454
3455 /* just flush them all */
3456 for (i = 0; i < g_num_of_iommus; i++) {
a2bb8459
WH
3457 struct intel_iommu *iommu = g_iommus[i];
3458 if (!iommu)
3459 continue;
c42d9f32 3460
9dd2fe89
YZ
3461 if (!deferred_flush[i].next)
3462 continue;
3463
78d5f0f5
NA
3464 /* In caching mode, global flushes turn emulation expensive */
3465 if (!cap_caching_mode(iommu->cap))
3466 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
93a23a72 3467 DMA_TLB_GLOBAL_FLUSH);
9dd2fe89 3468 for (j = 0; j < deferred_flush[i].next; j++) {
93a23a72
YZ
3469 unsigned long mask;
3470 struct iova *iova = deferred_flush[i].iova[j];
78d5f0f5
NA
3471 struct dmar_domain *domain = deferred_flush[i].domain[j];
3472
3473 /* On real hardware multiple invalidations are expensive */
3474 if (cap_caching_mode(iommu->cap))
a1ddcbe9 3475 iommu_flush_iotlb_psi(iommu, domain,
a156ef99 3476 iova->pfn_lo, iova_size(iova),
ea8ea460 3477 !deferred_flush[i].freelist[j], 0);
78d5f0f5 3478 else {
a156ef99 3479 mask = ilog2(mm_to_dma_pfn(iova_size(iova)));
78d5f0f5
NA
3480 iommu_flush_dev_iotlb(deferred_flush[i].domain[j],
3481 (uint64_t)iova->pfn_lo << PAGE_SHIFT, mask);
3482 }
93a23a72 3483 __free_iova(&deferred_flush[i].domain[j]->iovad, iova);
ea8ea460
DW
3484 if (deferred_flush[i].freelist[j])
3485 dma_free_pagelist(deferred_flush[i].freelist[j]);
80b20dd8 3486 }
9dd2fe89 3487 deferred_flush[i].next = 0;
5e0d2a6f 3488 }
3489
5e0d2a6f 3490 list_size = 0;
5e0d2a6f 3491}
3492
3493static void flush_unmaps_timeout(unsigned long data)
3494{
80b20dd8 3495 unsigned long flags;
3496
3497 spin_lock_irqsave(&async_umap_flush_lock, flags);
5e0d2a6f 3498 flush_unmaps();
80b20dd8 3499 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
5e0d2a6f 3500}
3501
ea8ea460 3502static void add_unmap(struct dmar_domain *dom, struct iova *iova, struct page *freelist)
5e0d2a6f 3503{
3504 unsigned long flags;
80b20dd8 3505 int next, iommu_id;
8c11e798 3506 struct intel_iommu *iommu;
5e0d2a6f 3507
3508 spin_lock_irqsave(&async_umap_flush_lock, flags);
80b20dd8 3509 if (list_size == HIGH_WATER_MARK)
3510 flush_unmaps();
3511
8c11e798
WH
3512 iommu = domain_get_iommu(dom);
3513 iommu_id = iommu->seq_id;
c42d9f32 3514
80b20dd8 3515 next = deferred_flush[iommu_id].next;
3516 deferred_flush[iommu_id].domain[next] = dom;
3517 deferred_flush[iommu_id].iova[next] = iova;
ea8ea460 3518 deferred_flush[iommu_id].freelist[next] = freelist;
80b20dd8 3519 deferred_flush[iommu_id].next++;
5e0d2a6f 3520
3521 if (!timer_on) {
3522 mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10));
3523 timer_on = 1;
3524 }
3525 list_size++;
3526 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
3527}
3528
d41a4adb 3529static void intel_unmap(struct device *dev, dma_addr_t dev_addr)
ba395927 3530{
f76aec76 3531 struct dmar_domain *domain;
d794dc9b 3532 unsigned long start_pfn, last_pfn;
ba395927 3533 struct iova *iova;
8c11e798 3534 struct intel_iommu *iommu;
ea8ea460 3535 struct page *freelist;
ba395927 3536
73676832 3537 if (iommu_no_mapping(dev))
f76aec76 3538 return;
2c2e2c38 3539
1525a29a 3540 domain = find_domain(dev);
ba395927
KA
3541 BUG_ON(!domain);
3542
8c11e798
WH
3543 iommu = domain_get_iommu(domain);
3544
ba395927 3545 iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
85b98276
DW
3546 if (WARN_ONCE(!iova, "Driver unmaps unmatched page at PFN %llx\n",
3547 (unsigned long long)dev_addr))
ba395927 3548 return;
ba395927 3549
d794dc9b
DW
3550 start_pfn = mm_to_dma_pfn(iova->pfn_lo);
3551 last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
ba395927 3552
d794dc9b 3553 pr_debug("Device %s unmapping: pfn %lx-%lx\n",
207e3592 3554 dev_name(dev), start_pfn, last_pfn);
ba395927 3555
ea8ea460 3556 freelist = domain_unmap(domain, start_pfn, last_pfn);
d794dc9b 3557
5e0d2a6f 3558 if (intel_iommu_strict) {
a1ddcbe9 3559 iommu_flush_iotlb_psi(iommu, domain, start_pfn,
ea8ea460 3560 last_pfn - start_pfn + 1, !freelist, 0);
5e0d2a6f 3561 /* free iova */
3562 __free_iova(&domain->iovad, iova);
ea8ea460 3563 dma_free_pagelist(freelist);
5e0d2a6f 3564 } else {
ea8ea460 3565 add_unmap(domain, iova, freelist);
5e0d2a6f 3566 /*
3567 * queue up the release of the unmap to save the 1/6th of the
3568 * cpu used up by the iotlb flush operation...
3569 */
5e0d2a6f 3570 }
ba395927
KA
3571}
3572
d41a4adb
JL
3573static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
3574 size_t size, enum dma_data_direction dir,
3575 struct dma_attrs *attrs)
3576{
3577 intel_unmap(dev, dev_addr);
3578}
3579
5040a918 3580static void *intel_alloc_coherent(struct device *dev, size_t size,
baa676fc
AP
3581 dma_addr_t *dma_handle, gfp_t flags,
3582 struct dma_attrs *attrs)
ba395927 3583{
36746436 3584 struct page *page = NULL;
ba395927
KA
3585 int order;
3586
5b6985ce 3587 size = PAGE_ALIGN(size);
ba395927 3588 order = get_order(size);
e8bb910d 3589
5040a918 3590 if (!iommu_no_mapping(dev))
e8bb910d 3591 flags &= ~(GFP_DMA | GFP_DMA32);
5040a918
DW
3592 else if (dev->coherent_dma_mask < dma_get_required_mask(dev)) {
3593 if (dev->coherent_dma_mask < DMA_BIT_MASK(32))
e8bb910d
AW
3594 flags |= GFP_DMA;
3595 else
3596 flags |= GFP_DMA32;
3597 }
ba395927 3598
36746436
AM
3599 if (flags & __GFP_WAIT) {
3600 unsigned int count = size >> PAGE_SHIFT;
3601
3602 page = dma_alloc_from_contiguous(dev, count, order);
3603 if (page && iommu_no_mapping(dev) &&
3604 page_to_phys(page) + size > dev->coherent_dma_mask) {
3605 dma_release_from_contiguous(dev, page, count);
3606 page = NULL;
3607 }
3608 }
3609
3610 if (!page)
3611 page = alloc_pages(flags, order);
3612 if (!page)
ba395927 3613 return NULL;
36746436 3614 memset(page_address(page), 0, size);
ba395927 3615
36746436 3616 *dma_handle = __intel_map_single(dev, page_to_phys(page), size,
bb9e6d65 3617 DMA_BIDIRECTIONAL,
5040a918 3618 dev->coherent_dma_mask);
ba395927 3619 if (*dma_handle)
36746436
AM
3620 return page_address(page);
3621 if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3622 __free_pages(page, order);
3623
ba395927
KA
3624 return NULL;
3625}
3626
5040a918 3627static void intel_free_coherent(struct device *dev, size_t size, void *vaddr,
baa676fc 3628 dma_addr_t dma_handle, struct dma_attrs *attrs)
ba395927
KA
3629{
3630 int order;
36746436 3631 struct page *page = virt_to_page(vaddr);
ba395927 3632
5b6985ce 3633 size = PAGE_ALIGN(size);
ba395927
KA
3634 order = get_order(size);
3635
d41a4adb 3636 intel_unmap(dev, dma_handle);
36746436
AM
3637 if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3638 __free_pages(page, order);
ba395927
KA
3639}
3640
5040a918 3641static void intel_unmap_sg(struct device *dev, struct scatterlist *sglist,
d7ab5c46
FT
3642 int nelems, enum dma_data_direction dir,
3643 struct dma_attrs *attrs)
ba395927 3644{
d41a4adb 3645 intel_unmap(dev, sglist[0].dma_address);
ba395927
KA
3646}
3647
ba395927 3648static int intel_nontranslate_map_sg(struct device *hddev,
c03ab37c 3649 struct scatterlist *sglist, int nelems, int dir)
ba395927
KA
3650{
3651 int i;
c03ab37c 3652 struct scatterlist *sg;
ba395927 3653
c03ab37c 3654 for_each_sg(sglist, sg, nelems, i) {
12d4d40e 3655 BUG_ON(!sg_page(sg));
db0fa0cb 3656 sg->dma_address = sg_phys(sg);
c03ab37c 3657 sg->dma_length = sg->length;
ba395927
KA
3658 }
3659 return nelems;
3660}
3661
5040a918 3662static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nelems,
d7ab5c46 3663 enum dma_data_direction dir, struct dma_attrs *attrs)
ba395927 3664{
ba395927 3665 int i;
ba395927 3666 struct dmar_domain *domain;
f76aec76
KA
3667 size_t size = 0;
3668 int prot = 0;
f76aec76
KA
3669 struct iova *iova = NULL;
3670 int ret;
c03ab37c 3671 struct scatterlist *sg;
b536d24d 3672 unsigned long start_vpfn;
8c11e798 3673 struct intel_iommu *iommu;
ba395927
KA
3674
3675 BUG_ON(dir == DMA_NONE);
5040a918
DW
3676 if (iommu_no_mapping(dev))
3677 return intel_nontranslate_map_sg(dev, sglist, nelems, dir);
ba395927 3678
5040a918 3679 domain = get_valid_domain_for_dev(dev);
f76aec76
KA
3680 if (!domain)
3681 return 0;
3682
8c11e798
WH
3683 iommu = domain_get_iommu(domain);
3684
b536d24d 3685 for_each_sg(sglist, sg, nelems, i)
88cb6a74 3686 size += aligned_nrpages(sg->offset, sg->length);
f76aec76 3687
5040a918
DW
3688 iova = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size),
3689 *dev->dma_mask);
f76aec76 3690 if (!iova) {
c03ab37c 3691 sglist->dma_length = 0;
f76aec76
KA
3692 return 0;
3693 }
3694
3695 /*
3696 * Check if DMAR supports zero-length reads on write only
3697 * mappings..
3698 */
3699 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
8c11e798 3700 !cap_zlr(iommu->cap))
f76aec76
KA
3701 prot |= DMA_PTE_READ;
3702 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3703 prot |= DMA_PTE_WRITE;
3704
b536d24d 3705 start_vpfn = mm_to_dma_pfn(iova->pfn_lo);
e1605495 3706
f532959b 3707 ret = domain_sg_mapping(domain, start_vpfn, sglist, size, prot);
e1605495 3708 if (unlikely(ret)) {
e1605495
DW
3709 dma_pte_free_pagetable(domain, start_vpfn,
3710 start_vpfn + size - 1);
e1605495
DW
3711 __free_iova(&domain->iovad, iova);
3712 return 0;
ba395927
KA
3713 }
3714
1f0ef2aa
DW
3715 /* it's a non-present to present mapping. Only flush if caching mode */
3716 if (cap_caching_mode(iommu->cap))
a1ddcbe9 3717 iommu_flush_iotlb_psi(iommu, domain, start_vpfn, size, 0, 1);
1f0ef2aa 3718 else
8c11e798 3719 iommu_flush_write_buffer(iommu);
1f0ef2aa 3720
ba395927
KA
3721 return nelems;
3722}
3723
dfb805e8
FT
3724static int intel_mapping_error(struct device *dev, dma_addr_t dma_addr)
3725{
3726 return !dma_addr;
3727}
3728
160c1d8e 3729struct dma_map_ops intel_dma_ops = {
baa676fc
AP
3730 .alloc = intel_alloc_coherent,
3731 .free = intel_free_coherent,
ba395927
KA
3732 .map_sg = intel_map_sg,
3733 .unmap_sg = intel_unmap_sg,
ffbbef5c
FT
3734 .map_page = intel_map_page,
3735 .unmap_page = intel_unmap_page,
dfb805e8 3736 .mapping_error = intel_mapping_error,
ba395927
KA
3737};
3738
3739static inline int iommu_domain_cache_init(void)
3740{
3741 int ret = 0;
3742
3743 iommu_domain_cache = kmem_cache_create("iommu_domain",
3744 sizeof(struct dmar_domain),
3745 0,
3746 SLAB_HWCACHE_ALIGN,
3747
3748 NULL);
3749 if (!iommu_domain_cache) {
9f10e5bf 3750 pr_err("Couldn't create iommu_domain cache\n");
ba395927
KA
3751 ret = -ENOMEM;
3752 }
3753
3754 return ret;
3755}
3756
3757static inline int iommu_devinfo_cache_init(void)
3758{
3759 int ret = 0;
3760
3761 iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
3762 sizeof(struct device_domain_info),
3763 0,
3764 SLAB_HWCACHE_ALIGN,
ba395927
KA
3765 NULL);
3766 if (!iommu_devinfo_cache) {
9f10e5bf 3767 pr_err("Couldn't create devinfo cache\n");
ba395927
KA
3768 ret = -ENOMEM;
3769 }
3770
3771 return ret;
3772}
3773
ba395927
KA
3774static int __init iommu_init_mempool(void)
3775{
3776 int ret;
ae1ff3d6 3777 ret = iova_cache_get();
ba395927
KA
3778 if (ret)
3779 return ret;
3780
3781 ret = iommu_domain_cache_init();
3782 if (ret)
3783 goto domain_error;
3784
3785 ret = iommu_devinfo_cache_init();
3786 if (!ret)
3787 return ret;
3788
3789 kmem_cache_destroy(iommu_domain_cache);
3790domain_error:
ae1ff3d6 3791 iova_cache_put();
ba395927
KA
3792
3793 return -ENOMEM;
3794}
3795
3796static void __init iommu_exit_mempool(void)
3797{
3798 kmem_cache_destroy(iommu_devinfo_cache);
3799 kmem_cache_destroy(iommu_domain_cache);
ae1ff3d6 3800 iova_cache_put();
ba395927
KA
3801}
3802
556ab45f
DW
3803static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
3804{
3805 struct dmar_drhd_unit *drhd;
3806 u32 vtbar;
3807 int rc;
3808
3809 /* We know that this device on this chipset has its own IOMMU.
3810 * If we find it under a different IOMMU, then the BIOS is lying
3811 * to us. Hope that the IOMMU for this device is actually
3812 * disabled, and it needs no translation...
3813 */
3814 rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar);
3815 if (rc) {
3816 /* "can't" happen */
3817 dev_info(&pdev->dev, "failed to run vt-d quirk\n");
3818 return;
3819 }
3820 vtbar &= 0xffff0000;
3821
3822 /* we know that the this iommu should be at offset 0xa000 from vtbar */
3823 drhd = dmar_find_matched_drhd_unit(pdev);
3824 if (WARN_TAINT_ONCE(!drhd || drhd->reg_base_addr - vtbar != 0xa000,
3825 TAINT_FIRMWARE_WORKAROUND,
3826 "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n"))
3827 pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
3828}
3829DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quirk_ioat_snb_local_iommu);
3830
ba395927
KA
3831static void __init init_no_remapping_devices(void)
3832{
3833 struct dmar_drhd_unit *drhd;
832bd858 3834 struct device *dev;
b683b230 3835 int i;
ba395927
KA
3836
3837 for_each_drhd_unit(drhd) {
3838 if (!drhd->include_all) {
b683b230
JL
3839 for_each_active_dev_scope(drhd->devices,
3840 drhd->devices_cnt, i, dev)
3841 break;
832bd858 3842 /* ignore DMAR unit if no devices exist */
ba395927
KA
3843 if (i == drhd->devices_cnt)
3844 drhd->ignored = 1;
3845 }
3846 }
3847
7c919779 3848 for_each_active_drhd_unit(drhd) {
7c919779 3849 if (drhd->include_all)
ba395927
KA
3850 continue;
3851
b683b230
JL
3852 for_each_active_dev_scope(drhd->devices,
3853 drhd->devices_cnt, i, dev)
832bd858 3854 if (!dev_is_pci(dev) || !IS_GFX_DEVICE(to_pci_dev(dev)))
ba395927 3855 break;
ba395927
KA
3856 if (i < drhd->devices_cnt)
3857 continue;
3858
c0771df8
DW
3859 /* This IOMMU has *only* gfx devices. Either bypass it or
3860 set the gfx_mapped flag, as appropriate */
3861 if (dmar_map_gfx) {
3862 intel_iommu_gfx_mapped = 1;
3863 } else {
3864 drhd->ignored = 1;
b683b230
JL
3865 for_each_active_dev_scope(drhd->devices,
3866 drhd->devices_cnt, i, dev)
832bd858 3867 dev->archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
ba395927
KA
3868 }
3869 }
3870}
3871
f59c7b69
FY
3872#ifdef CONFIG_SUSPEND
3873static int init_iommu_hw(void)
3874{
3875 struct dmar_drhd_unit *drhd;
3876 struct intel_iommu *iommu = NULL;
3877
3878 for_each_active_iommu(iommu, drhd)
3879 if (iommu->qi)
3880 dmar_reenable_qi(iommu);
3881
b779260b
JC
3882 for_each_iommu(iommu, drhd) {
3883 if (drhd->ignored) {
3884 /*
3885 * we always have to disable PMRs or DMA may fail on
3886 * this device
3887 */
3888 if (force_on)
3889 iommu_disable_protect_mem_regions(iommu);
3890 continue;
3891 }
3892
f59c7b69
FY
3893 iommu_flush_write_buffer(iommu);
3894
3895 iommu_set_root_entry(iommu);
3896
3897 iommu->flush.flush_context(iommu, 0, 0, 0,
1f0ef2aa 3898 DMA_CCMD_GLOBAL_INVL);
2a41ccee
JL
3899 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
3900 iommu_enable_translation(iommu);
b94996c9 3901 iommu_disable_protect_mem_regions(iommu);
f59c7b69
FY
3902 }
3903
3904 return 0;
3905}
3906
3907static void iommu_flush_all(void)
3908{
3909 struct dmar_drhd_unit *drhd;
3910 struct intel_iommu *iommu;
3911
3912 for_each_active_iommu(iommu, drhd) {
3913 iommu->flush.flush_context(iommu, 0, 0, 0,
1f0ef2aa 3914 DMA_CCMD_GLOBAL_INVL);
f59c7b69 3915 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
1f0ef2aa 3916 DMA_TLB_GLOBAL_FLUSH);
f59c7b69
FY
3917 }
3918}
3919
134fac3f 3920static int iommu_suspend(void)
f59c7b69
FY
3921{
3922 struct dmar_drhd_unit *drhd;
3923 struct intel_iommu *iommu = NULL;
3924 unsigned long flag;
3925
3926 for_each_active_iommu(iommu, drhd) {
3927 iommu->iommu_state = kzalloc(sizeof(u32) * MAX_SR_DMAR_REGS,
3928 GFP_ATOMIC);
3929 if (!iommu->iommu_state)
3930 goto nomem;
3931 }
3932
3933 iommu_flush_all();
3934
3935 for_each_active_iommu(iommu, drhd) {
3936 iommu_disable_translation(iommu);
3937
1f5b3c3f 3938 raw_spin_lock_irqsave(&iommu->register_lock, flag);
f59c7b69
FY
3939
3940 iommu->iommu_state[SR_DMAR_FECTL_REG] =
3941 readl(iommu->reg + DMAR_FECTL_REG);
3942 iommu->iommu_state[SR_DMAR_FEDATA_REG] =
3943 readl(iommu->reg + DMAR_FEDATA_REG);
3944 iommu->iommu_state[SR_DMAR_FEADDR_REG] =
3945 readl(iommu->reg + DMAR_FEADDR_REG);
3946 iommu->iommu_state[SR_DMAR_FEUADDR_REG] =
3947 readl(iommu->reg + DMAR_FEUADDR_REG);
3948
1f5b3c3f 3949 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
f59c7b69
FY
3950 }
3951 return 0;
3952
3953nomem:
3954 for_each_active_iommu(iommu, drhd)
3955 kfree(iommu->iommu_state);
3956
3957 return -ENOMEM;
3958}
3959
134fac3f 3960static void iommu_resume(void)
f59c7b69
FY
3961{
3962 struct dmar_drhd_unit *drhd;
3963 struct intel_iommu *iommu = NULL;
3964 unsigned long flag;
3965
3966 if (init_iommu_hw()) {
b779260b
JC
3967 if (force_on)
3968 panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
3969 else
3970 WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
134fac3f 3971 return;
f59c7b69
FY
3972 }
3973
3974 for_each_active_iommu(iommu, drhd) {
3975
1f5b3c3f 3976 raw_spin_lock_irqsave(&iommu->register_lock, flag);
f59c7b69
FY
3977
3978 writel(iommu->iommu_state[SR_DMAR_FECTL_REG],
3979 iommu->reg + DMAR_FECTL_REG);
3980 writel(iommu->iommu_state[SR_DMAR_FEDATA_REG],
3981 iommu->reg + DMAR_FEDATA_REG);
3982 writel(iommu->iommu_state[SR_DMAR_FEADDR_REG],
3983 iommu->reg + DMAR_FEADDR_REG);
3984 writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG],
3985 iommu->reg + DMAR_FEUADDR_REG);
3986
1f5b3c3f 3987 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
f59c7b69
FY
3988 }
3989
3990 for_each_active_iommu(iommu, drhd)
3991 kfree(iommu->iommu_state);
f59c7b69
FY
3992}
3993
134fac3f 3994static struct syscore_ops iommu_syscore_ops = {
f59c7b69
FY
3995 .resume = iommu_resume,
3996 .suspend = iommu_suspend,
3997};
3998
134fac3f 3999static void __init init_iommu_pm_ops(void)
f59c7b69 4000{
134fac3f 4001 register_syscore_ops(&iommu_syscore_ops);
f59c7b69
FY
4002}
4003
4004#else
99592ba4 4005static inline void init_iommu_pm_ops(void) {}
f59c7b69
FY
4006#endif /* CONFIG_PM */
4007
318fe7df 4008
c2a0b538 4009int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header, void *arg)
318fe7df
SS
4010{
4011 struct acpi_dmar_reserved_memory *rmrr;
4012 struct dmar_rmrr_unit *rmrru;
4013
4014 rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
4015 if (!rmrru)
4016 return -ENOMEM;
4017
4018 rmrru->hdr = header;
4019 rmrr = (struct acpi_dmar_reserved_memory *)header;
4020 rmrru->base_address = rmrr->base_address;
4021 rmrru->end_address = rmrr->end_address;
2e455289
JL
4022 rmrru->devices = dmar_alloc_dev_scope((void *)(rmrr + 1),
4023 ((void *)rmrr) + rmrr->header.length,
4024 &rmrru->devices_cnt);
4025 if (rmrru->devices_cnt && rmrru->devices == NULL) {
4026 kfree(rmrru);
4027 return -ENOMEM;
4028 }
318fe7df 4029
2e455289 4030 list_add(&rmrru->list, &dmar_rmrr_units);
318fe7df 4031
2e455289 4032 return 0;
318fe7df
SS
4033}
4034
6b197249
JL
4035static struct dmar_atsr_unit *dmar_find_atsr(struct acpi_dmar_atsr *atsr)
4036{
4037 struct dmar_atsr_unit *atsru;
4038 struct acpi_dmar_atsr *tmp;
4039
4040 list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
4041 tmp = (struct acpi_dmar_atsr *)atsru->hdr;
4042 if (atsr->segment != tmp->segment)
4043 continue;
4044 if (atsr->header.length != tmp->header.length)
4045 continue;
4046 if (memcmp(atsr, tmp, atsr->header.length) == 0)
4047 return atsru;
4048 }
4049
4050 return NULL;
4051}
4052
4053int dmar_parse_one_atsr(struct acpi_dmar_header *hdr, void *arg)
318fe7df
SS
4054{
4055 struct acpi_dmar_atsr *atsr;
4056 struct dmar_atsr_unit *atsru;
4057
6b197249
JL
4058 if (system_state != SYSTEM_BOOTING && !intel_iommu_enabled)
4059 return 0;
4060
318fe7df 4061 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
6b197249
JL
4062 atsru = dmar_find_atsr(atsr);
4063 if (atsru)
4064 return 0;
4065
4066 atsru = kzalloc(sizeof(*atsru) + hdr->length, GFP_KERNEL);
318fe7df
SS
4067 if (!atsru)
4068 return -ENOMEM;
4069
6b197249
JL
4070 /*
4071 * If memory is allocated from slab by ACPI _DSM method, we need to
4072 * copy the memory content because the memory buffer will be freed
4073 * on return.
4074 */
4075 atsru->hdr = (void *)(atsru + 1);
4076 memcpy(atsru->hdr, hdr, hdr->length);
318fe7df 4077 atsru->include_all = atsr->flags & 0x1;
2e455289
JL
4078 if (!atsru->include_all) {
4079 atsru->devices = dmar_alloc_dev_scope((void *)(atsr + 1),
4080 (void *)atsr + atsr->header.length,
4081 &atsru->devices_cnt);
4082 if (atsru->devices_cnt && atsru->devices == NULL) {
4083 kfree(atsru);
4084 return -ENOMEM;
4085 }
4086 }
318fe7df 4087
0e242612 4088 list_add_rcu(&atsru->list, &dmar_atsr_units);
318fe7df
SS
4089
4090 return 0;
4091}
4092
9bdc531e
JL
4093static void intel_iommu_free_atsr(struct dmar_atsr_unit *atsru)
4094{
4095 dmar_free_dev_scope(&atsru->devices, &atsru->devices_cnt);
4096 kfree(atsru);
4097}
4098
6b197249
JL
4099int dmar_release_one_atsr(struct acpi_dmar_header *hdr, void *arg)
4100{
4101 struct acpi_dmar_atsr *atsr;
4102 struct dmar_atsr_unit *atsru;
4103
4104 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
4105 atsru = dmar_find_atsr(atsr);
4106 if (atsru) {
4107 list_del_rcu(&atsru->list);
4108 synchronize_rcu();
4109 intel_iommu_free_atsr(atsru);
4110 }
4111
4112 return 0;
4113}
4114
4115int dmar_check_one_atsr(struct acpi_dmar_header *hdr, void *arg)
4116{
4117 int i;
4118 struct device *dev;
4119 struct acpi_dmar_atsr *atsr;
4120 struct dmar_atsr_unit *atsru;
4121
4122 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
4123 atsru = dmar_find_atsr(atsr);
4124 if (!atsru)
4125 return 0;
4126
4127 if (!atsru->include_all && atsru->devices && atsru->devices_cnt)
4128 for_each_active_dev_scope(atsru->devices, atsru->devices_cnt,
4129 i, dev)
4130 return -EBUSY;
4131
4132 return 0;
4133}
4134
ffebeb46
JL
4135static int intel_iommu_add(struct dmar_drhd_unit *dmaru)
4136{
4137 int sp, ret = 0;
4138 struct intel_iommu *iommu = dmaru->iommu;
4139
4140 if (g_iommus[iommu->seq_id])
4141 return 0;
4142
4143 if (hw_pass_through && !ecap_pass_through(iommu->ecap)) {
9f10e5bf 4144 pr_warn("%s: Doesn't support hardware pass through.\n",
ffebeb46
JL
4145 iommu->name);
4146 return -ENXIO;
4147 }
4148 if (!ecap_sc_support(iommu->ecap) &&
4149 domain_update_iommu_snooping(iommu)) {
9f10e5bf 4150 pr_warn("%s: Doesn't support snooping.\n",
ffebeb46
JL
4151 iommu->name);
4152 return -ENXIO;
4153 }
4154 sp = domain_update_iommu_superpage(iommu) - 1;
4155 if (sp >= 0 && !(cap_super_page_val(iommu->cap) & (1 << sp))) {
9f10e5bf 4156 pr_warn("%s: Doesn't support large page.\n",
ffebeb46
JL
4157 iommu->name);
4158 return -ENXIO;
4159 }
4160
4161 /*
4162 * Disable translation if already enabled prior to OS handover.
4163 */
4164 if (iommu->gcmd & DMA_GCMD_TE)
4165 iommu_disable_translation(iommu);
4166
4167 g_iommus[iommu->seq_id] = iommu;
4168 ret = iommu_init_domains(iommu);
4169 if (ret == 0)
4170 ret = iommu_alloc_root_entry(iommu);
4171 if (ret)
4172 goto out;
4173
8a94ade4
DW
4174#ifdef CONFIG_INTEL_IOMMU_SVM
4175 if (pasid_enabled(iommu))
4176 intel_svm_alloc_pasid_tables(iommu);
4177#endif
4178
ffebeb46
JL
4179 if (dmaru->ignored) {
4180 /*
4181 * we always have to disable PMRs or DMA may fail on this device
4182 */
4183 if (force_on)
4184 iommu_disable_protect_mem_regions(iommu);
4185 return 0;
4186 }
4187
4188 intel_iommu_init_qi(iommu);
4189 iommu_flush_write_buffer(iommu);
4190 ret = dmar_set_interrupt(iommu);
4191 if (ret)
4192 goto disable_iommu;
4193
4194 iommu_set_root_entry(iommu);
4195 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
4196 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
4197 iommu_enable_translation(iommu);
4198
ffebeb46
JL
4199 iommu_disable_protect_mem_regions(iommu);
4200 return 0;
4201
4202disable_iommu:
4203 disable_dmar_iommu(iommu);
4204out:
4205 free_dmar_iommu(iommu);
4206 return ret;
4207}
4208
6b197249
JL
4209int dmar_iommu_hotplug(struct dmar_drhd_unit *dmaru, bool insert)
4210{
ffebeb46
JL
4211 int ret = 0;
4212 struct intel_iommu *iommu = dmaru->iommu;
4213
4214 if (!intel_iommu_enabled)
4215 return 0;
4216 if (iommu == NULL)
4217 return -EINVAL;
4218
4219 if (insert) {
4220 ret = intel_iommu_add(dmaru);
4221 } else {
4222 disable_dmar_iommu(iommu);
4223 free_dmar_iommu(iommu);
4224 }
4225
4226 return ret;
6b197249
JL
4227}
4228
9bdc531e
JL
4229static void intel_iommu_free_dmars(void)
4230{
4231 struct dmar_rmrr_unit *rmrru, *rmrr_n;
4232 struct dmar_atsr_unit *atsru, *atsr_n;
4233
4234 list_for_each_entry_safe(rmrru, rmrr_n, &dmar_rmrr_units, list) {
4235 list_del(&rmrru->list);
4236 dmar_free_dev_scope(&rmrru->devices, &rmrru->devices_cnt);
4237 kfree(rmrru);
318fe7df
SS
4238 }
4239
9bdc531e
JL
4240 list_for_each_entry_safe(atsru, atsr_n, &dmar_atsr_units, list) {
4241 list_del(&atsru->list);
4242 intel_iommu_free_atsr(atsru);
4243 }
318fe7df
SS
4244}
4245
4246int dmar_find_matched_atsr_unit(struct pci_dev *dev)
4247{
b683b230 4248 int i, ret = 1;
318fe7df 4249 struct pci_bus *bus;
832bd858
DW
4250 struct pci_dev *bridge = NULL;
4251 struct device *tmp;
318fe7df
SS
4252 struct acpi_dmar_atsr *atsr;
4253 struct dmar_atsr_unit *atsru;
4254
4255 dev = pci_physfn(dev);
318fe7df 4256 for (bus = dev->bus; bus; bus = bus->parent) {
b5f82ddf 4257 bridge = bus->self;
d14053b3
DW
4258 /* If it's an integrated device, allow ATS */
4259 if (!bridge)
4260 return 1;
4261 /* Connected via non-PCIe: no ATS */
4262 if (!pci_is_pcie(bridge) ||
62f87c0e 4263 pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE)
318fe7df 4264 return 0;
d14053b3 4265 /* If we found the root port, look it up in the ATSR */
b5f82ddf 4266 if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT)
318fe7df 4267 break;
318fe7df
SS
4268 }
4269
0e242612 4270 rcu_read_lock();
b5f82ddf
JL
4271 list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
4272 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
4273 if (atsr->segment != pci_domain_nr(dev->bus))
4274 continue;
4275
b683b230 4276 for_each_dev_scope(atsru->devices, atsru->devices_cnt, i, tmp)
832bd858 4277 if (tmp == &bridge->dev)
b683b230 4278 goto out;
b5f82ddf
JL
4279
4280 if (atsru->include_all)
b683b230 4281 goto out;
b5f82ddf 4282 }
b683b230
JL
4283 ret = 0;
4284out:
0e242612 4285 rcu_read_unlock();
318fe7df 4286
b683b230 4287 return ret;
318fe7df
SS
4288}
4289
59ce0515
JL
4290int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info)
4291{
4292 int ret = 0;
4293 struct dmar_rmrr_unit *rmrru;
4294 struct dmar_atsr_unit *atsru;
4295 struct acpi_dmar_atsr *atsr;
4296 struct acpi_dmar_reserved_memory *rmrr;
4297
4298 if (!intel_iommu_enabled && system_state != SYSTEM_BOOTING)
4299 return 0;
4300
4301 list_for_each_entry(rmrru, &dmar_rmrr_units, list) {
4302 rmrr = container_of(rmrru->hdr,
4303 struct acpi_dmar_reserved_memory, header);
4304 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
4305 ret = dmar_insert_dev_scope(info, (void *)(rmrr + 1),
4306 ((void *)rmrr) + rmrr->header.length,
4307 rmrr->segment, rmrru->devices,
4308 rmrru->devices_cnt);
27e24950 4309 if(ret < 0)
59ce0515
JL
4310 return ret;
4311 } else if (info->event == BUS_NOTIFY_DEL_DEVICE) {
27e24950
JL
4312 dmar_remove_dev_scope(info, rmrr->segment,
4313 rmrru->devices, rmrru->devices_cnt);
59ce0515
JL
4314 }
4315 }
4316
4317 list_for_each_entry(atsru, &dmar_atsr_units, list) {
4318 if (atsru->include_all)
4319 continue;
4320
4321 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
4322 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
4323 ret = dmar_insert_dev_scope(info, (void *)(atsr + 1),
4324 (void *)atsr + atsr->header.length,
4325 atsr->segment, atsru->devices,
4326 atsru->devices_cnt);
4327 if (ret > 0)
4328 break;
4329 else if(ret < 0)
4330 return ret;
4331 } else if (info->event == BUS_NOTIFY_DEL_DEVICE) {
4332 if (dmar_remove_dev_scope(info, atsr->segment,
4333 atsru->devices, atsru->devices_cnt))
4334 break;
4335 }
4336 }
4337
4338 return 0;
4339}
4340
99dcaded
FY
4341/*
4342 * Here we only respond to action of unbound device from driver.
4343 *
4344 * Added device is not attached to its DMAR domain here yet. That will happen
4345 * when mapping the device to iova.
4346 */
4347static int device_notifier(struct notifier_block *nb,
4348 unsigned long action, void *data)
4349{
4350 struct device *dev = data;
99dcaded
FY
4351 struct dmar_domain *domain;
4352
3d89194a 4353 if (iommu_dummy(dev))
44cd613c
DW
4354 return 0;
4355
1196c2fb 4356 if (action != BUS_NOTIFY_REMOVED_DEVICE)
7e7dfab7
JL
4357 return 0;
4358
1525a29a 4359 domain = find_domain(dev);
99dcaded
FY
4360 if (!domain)
4361 return 0;
4362
e6de0f8d 4363 dmar_remove_one_dev_info(domain, dev);
ab8dfe25 4364 if (!domain_type_is_vm_or_si(domain) && list_empty(&domain->devices))
7e7dfab7 4365 domain_exit(domain);
a97590e5 4366
99dcaded
FY
4367 return 0;
4368}
4369
4370static struct notifier_block device_nb = {
4371 .notifier_call = device_notifier,
4372};
4373
75f05569
JL
4374static int intel_iommu_memory_notifier(struct notifier_block *nb,
4375 unsigned long val, void *v)
4376{
4377 struct memory_notify *mhp = v;
4378 unsigned long long start, end;
4379 unsigned long start_vpfn, last_vpfn;
4380
4381 switch (val) {
4382 case MEM_GOING_ONLINE:
4383 start = mhp->start_pfn << PAGE_SHIFT;
4384 end = ((mhp->start_pfn + mhp->nr_pages) << PAGE_SHIFT) - 1;
4385 if (iommu_domain_identity_map(si_domain, start, end)) {
9f10e5bf 4386 pr_warn("Failed to build identity map for [%llx-%llx]\n",
75f05569
JL
4387 start, end);
4388 return NOTIFY_BAD;
4389 }
4390 break;
4391
4392 case MEM_OFFLINE:
4393 case MEM_CANCEL_ONLINE:
4394 start_vpfn = mm_to_dma_pfn(mhp->start_pfn);
4395 last_vpfn = mm_to_dma_pfn(mhp->start_pfn + mhp->nr_pages - 1);
4396 while (start_vpfn <= last_vpfn) {
4397 struct iova *iova;
4398 struct dmar_drhd_unit *drhd;
4399 struct intel_iommu *iommu;
ea8ea460 4400 struct page *freelist;
75f05569
JL
4401
4402 iova = find_iova(&si_domain->iovad, start_vpfn);
4403 if (iova == NULL) {
9f10e5bf 4404 pr_debug("Failed get IOVA for PFN %lx\n",
75f05569
JL
4405 start_vpfn);
4406 break;
4407 }
4408
4409 iova = split_and_remove_iova(&si_domain->iovad, iova,
4410 start_vpfn, last_vpfn);
4411 if (iova == NULL) {
9f10e5bf 4412 pr_warn("Failed to split IOVA PFN [%lx-%lx]\n",
75f05569
JL
4413 start_vpfn, last_vpfn);
4414 return NOTIFY_BAD;
4415 }
4416
ea8ea460
DW
4417 freelist = domain_unmap(si_domain, iova->pfn_lo,
4418 iova->pfn_hi);
4419
75f05569
JL
4420 rcu_read_lock();
4421 for_each_active_iommu(iommu, drhd)
a1ddcbe9 4422 iommu_flush_iotlb_psi(iommu, si_domain,
a156ef99 4423 iova->pfn_lo, iova_size(iova),
ea8ea460 4424 !freelist, 0);
75f05569 4425 rcu_read_unlock();
ea8ea460 4426 dma_free_pagelist(freelist);
75f05569
JL
4427
4428 start_vpfn = iova->pfn_hi + 1;
4429 free_iova_mem(iova);
4430 }
4431 break;
4432 }
4433
4434 return NOTIFY_OK;
4435}
4436
4437static struct notifier_block intel_iommu_memory_nb = {
4438 .notifier_call = intel_iommu_memory_notifier,
4439 .priority = 0
4440};
4441
a5459cfe
AW
4442
4443static ssize_t intel_iommu_show_version(struct device *dev,
4444 struct device_attribute *attr,
4445 char *buf)
4446{
4447 struct intel_iommu *iommu = dev_get_drvdata(dev);
4448 u32 ver = readl(iommu->reg + DMAR_VER_REG);
4449 return sprintf(buf, "%d:%d\n",
4450 DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver));
4451}
4452static DEVICE_ATTR(version, S_IRUGO, intel_iommu_show_version, NULL);
4453
4454static ssize_t intel_iommu_show_address(struct device *dev,
4455 struct device_attribute *attr,
4456 char *buf)
4457{
4458 struct intel_iommu *iommu = dev_get_drvdata(dev);
4459 return sprintf(buf, "%llx\n", iommu->reg_phys);
4460}
4461static DEVICE_ATTR(address, S_IRUGO, intel_iommu_show_address, NULL);
4462
4463static ssize_t intel_iommu_show_cap(struct device *dev,
4464 struct device_attribute *attr,
4465 char *buf)
4466{
4467 struct intel_iommu *iommu = dev_get_drvdata(dev);
4468 return sprintf(buf, "%llx\n", iommu->cap);
4469}
4470static DEVICE_ATTR(cap, S_IRUGO, intel_iommu_show_cap, NULL);
4471
4472static ssize_t intel_iommu_show_ecap(struct device *dev,
4473 struct device_attribute *attr,
4474 char *buf)
4475{
4476 struct intel_iommu *iommu = dev_get_drvdata(dev);
4477 return sprintf(buf, "%llx\n", iommu->ecap);
4478}
4479static DEVICE_ATTR(ecap, S_IRUGO, intel_iommu_show_ecap, NULL);
4480
2238c082
AW
4481static ssize_t intel_iommu_show_ndoms(struct device *dev,
4482 struct device_attribute *attr,
4483 char *buf)
4484{
4485 struct intel_iommu *iommu = dev_get_drvdata(dev);
4486 return sprintf(buf, "%ld\n", cap_ndoms(iommu->cap));
4487}
4488static DEVICE_ATTR(domains_supported, S_IRUGO, intel_iommu_show_ndoms, NULL);
4489
4490static ssize_t intel_iommu_show_ndoms_used(struct device *dev,
4491 struct device_attribute *attr,
4492 char *buf)
4493{
4494 struct intel_iommu *iommu = dev_get_drvdata(dev);
4495 return sprintf(buf, "%d\n", bitmap_weight(iommu->domain_ids,
4496 cap_ndoms(iommu->cap)));
4497}
4498static DEVICE_ATTR(domains_used, S_IRUGO, intel_iommu_show_ndoms_used, NULL);
4499
a5459cfe
AW
4500static struct attribute *intel_iommu_attrs[] = {
4501 &dev_attr_version.attr,
4502 &dev_attr_address.attr,
4503 &dev_attr_cap.attr,
4504 &dev_attr_ecap.attr,
2238c082
AW
4505 &dev_attr_domains_supported.attr,
4506 &dev_attr_domains_used.attr,
a5459cfe
AW
4507 NULL,
4508};
4509
4510static struct attribute_group intel_iommu_group = {
4511 .name = "intel-iommu",
4512 .attrs = intel_iommu_attrs,
4513};
4514
4515const struct attribute_group *intel_iommu_groups[] = {
4516 &intel_iommu_group,
4517 NULL,
4518};
4519
ba395927
KA
4520int __init intel_iommu_init(void)
4521{
9bdc531e 4522 int ret = -ENODEV;
3a93c841 4523 struct dmar_drhd_unit *drhd;
7c919779 4524 struct intel_iommu *iommu;
ba395927 4525
a59b50e9
JC
4526 /* VT-d is required for a TXT/tboot launch, so enforce that */
4527 force_on = tboot_force_iommu();
4528
3a5670e8
JL
4529 if (iommu_init_mempool()) {
4530 if (force_on)
4531 panic("tboot: Failed to initialize iommu memory\n");
4532 return -ENOMEM;
4533 }
4534
4535 down_write(&dmar_global_lock);
a59b50e9
JC
4536 if (dmar_table_init()) {
4537 if (force_on)
4538 panic("tboot: Failed to initialize DMAR table\n");
9bdc531e 4539 goto out_free_dmar;
a59b50e9 4540 }
ba395927 4541
c2c7286a 4542 if (dmar_dev_scope_init() < 0) {
a59b50e9
JC
4543 if (force_on)
4544 panic("tboot: Failed to initialize DMAR device scope\n");
9bdc531e 4545 goto out_free_dmar;
a59b50e9 4546 }
1886e8a9 4547
75f1cdf1 4548 if (no_iommu || dmar_disabled)
9bdc531e 4549 goto out_free_dmar;
2ae21010 4550
318fe7df 4551 if (list_empty(&dmar_rmrr_units))
9f10e5bf 4552 pr_info("No RMRR found\n");
318fe7df
SS
4553
4554 if (list_empty(&dmar_atsr_units))
9f10e5bf 4555 pr_info("No ATSR found\n");
318fe7df 4556
51a63e67
JC
4557 if (dmar_init_reserved_ranges()) {
4558 if (force_on)
4559 panic("tboot: Failed to reserve iommu ranges\n");
3a5670e8 4560 goto out_free_reserved_range;
51a63e67 4561 }
ba395927
KA
4562
4563 init_no_remapping_devices();
4564
b779260b 4565 ret = init_dmars();
ba395927 4566 if (ret) {
a59b50e9
JC
4567 if (force_on)
4568 panic("tboot: Failed to initialize DMARs\n");
9f10e5bf 4569 pr_err("Initialization failed\n");
9bdc531e 4570 goto out_free_reserved_range;
ba395927 4571 }
3a5670e8 4572 up_write(&dmar_global_lock);
9f10e5bf 4573 pr_info("Intel(R) Virtualization Technology for Directed I/O\n");
ba395927 4574
5e0d2a6f 4575 init_timer(&unmap_timer);
75f1cdf1
FT
4576#ifdef CONFIG_SWIOTLB
4577 swiotlb = 0;
4578#endif
19943b0e 4579 dma_ops = &intel_dma_ops;
4ed0d3e6 4580
134fac3f 4581 init_iommu_pm_ops();
a8bcbb0d 4582
a5459cfe
AW
4583 for_each_active_iommu(iommu, drhd)
4584 iommu->iommu_dev = iommu_device_create(NULL, iommu,
4585 intel_iommu_groups,
2439d4aa 4586 "%s", iommu->name);
a5459cfe 4587
4236d97d 4588 bus_set_iommu(&pci_bus_type, &intel_iommu_ops);
99dcaded 4589 bus_register_notifier(&pci_bus_type, &device_nb);
75f05569
JL
4590 if (si_domain && !hw_pass_through)
4591 register_memory_notifier(&intel_iommu_memory_nb);
99dcaded 4592
8bc1f85c
ED
4593 intel_iommu_enabled = 1;
4594
ba395927 4595 return 0;
9bdc531e
JL
4596
4597out_free_reserved_range:
4598 put_iova_domain(&reserved_iova_list);
9bdc531e
JL
4599out_free_dmar:
4600 intel_iommu_free_dmars();
3a5670e8
JL
4601 up_write(&dmar_global_lock);
4602 iommu_exit_mempool();
9bdc531e 4603 return ret;
ba395927 4604}
e820482c 4605
2452d9db 4606static int domain_context_clear_one_cb(struct pci_dev *pdev, u16 alias, void *opaque)
579305f7
AW
4607{
4608 struct intel_iommu *iommu = opaque;
4609
2452d9db 4610 domain_context_clear_one(iommu, PCI_BUS_NUM(alias), alias & 0xff);
579305f7
AW
4611 return 0;
4612}
4613
4614/*
4615 * NB - intel-iommu lacks any sort of reference counting for the users of
4616 * dependent devices. If multiple endpoints have intersecting dependent
4617 * devices, unbinding the driver from any one of them will possibly leave
4618 * the others unable to operate.
4619 */
2452d9db 4620static void domain_context_clear(struct intel_iommu *iommu, struct device *dev)
3199aa6b 4621{
0bcb3e28 4622 if (!iommu || !dev || !dev_is_pci(dev))
3199aa6b
HW
4623 return;
4624
2452d9db 4625 pci_for_each_dma_alias(to_pci_dev(dev), &domain_context_clear_one_cb, iommu);
3199aa6b
HW
4626}
4627
127c7615 4628static void __dmar_remove_one_dev_info(struct device_domain_info *info)
c7151a8d 4629{
c7151a8d
WH
4630 struct intel_iommu *iommu;
4631 unsigned long flags;
c7151a8d 4632
55d94043
JR
4633 assert_spin_locked(&device_domain_lock);
4634
127c7615 4635 if (WARN_ON(!info))
c7151a8d
WH
4636 return;
4637
127c7615 4638 iommu = info->iommu;
c7151a8d 4639
127c7615
JR
4640 if (info->dev) {
4641 iommu_disable_dev_iotlb(info);
4642 domain_context_clear(iommu, info->dev);
4643 }
c7151a8d 4644
b608ac3b 4645 unlink_domain_info(info);
c7151a8d 4646
d160aca5 4647 spin_lock_irqsave(&iommu->lock, flags);
127c7615 4648 domain_detach_iommu(info->domain, iommu);
d160aca5 4649 spin_unlock_irqrestore(&iommu->lock, flags);
c7151a8d 4650
127c7615 4651 free_devinfo_mem(info);
c7151a8d 4652}
c7151a8d 4653
55d94043
JR
4654static void dmar_remove_one_dev_info(struct dmar_domain *domain,
4655 struct device *dev)
4656{
127c7615 4657 struct device_domain_info *info;
55d94043 4658 unsigned long flags;
3e7abe25 4659
55d94043 4660 spin_lock_irqsave(&device_domain_lock, flags);
127c7615
JR
4661 info = dev->archdata.iommu;
4662 __dmar_remove_one_dev_info(info);
55d94043 4663 spin_unlock_irqrestore(&device_domain_lock, flags);
c7151a8d
WH
4664}
4665
2c2e2c38 4666static int md_domain_init(struct dmar_domain *domain, int guest_width)
5e98c4b1
WH
4667{
4668 int adjust_width;
4669
0fb5fe87
RM
4670 init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN,
4671 DMA_32BIT_PFN);
5e98c4b1
WH
4672 domain_reserve_special_ranges(domain);
4673
4674 /* calculate AGAW */
4675 domain->gaw = guest_width;
4676 adjust_width = guestwidth_to_adjustwidth(guest_width);
4677 domain->agaw = width_to_agaw(adjust_width);
4678
5e98c4b1 4679 domain->iommu_coherency = 0;
c5b15255 4680 domain->iommu_snooping = 0;
6dd9a7c7 4681 domain->iommu_superpage = 0;
fe40f1e0 4682 domain->max_addr = 0;
5e98c4b1
WH
4683
4684 /* always allocate the top pgd */
4c923d47 4685 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
5e98c4b1
WH
4686 if (!domain->pgd)
4687 return -ENOMEM;
4688 domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
4689 return 0;
4690}
4691
00a77deb 4692static struct iommu_domain *intel_iommu_domain_alloc(unsigned type)
38717946 4693{
5d450806 4694 struct dmar_domain *dmar_domain;
00a77deb
JR
4695 struct iommu_domain *domain;
4696
4697 if (type != IOMMU_DOMAIN_UNMANAGED)
4698 return NULL;
38717946 4699
ab8dfe25 4700 dmar_domain = alloc_domain(DOMAIN_FLAG_VIRTUAL_MACHINE);
5d450806 4701 if (!dmar_domain) {
9f10e5bf 4702 pr_err("Can't allocate dmar_domain\n");
00a77deb 4703 return NULL;
38717946 4704 }
2c2e2c38 4705 if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
9f10e5bf 4706 pr_err("Domain initialization failed\n");
92d03cc8 4707 domain_exit(dmar_domain);
00a77deb 4708 return NULL;
38717946 4709 }
8140a95d 4710 domain_update_iommu_cap(dmar_domain);
faa3d6f5 4711
00a77deb 4712 domain = &dmar_domain->domain;
8a0e715b
JR
4713 domain->geometry.aperture_start = 0;
4714 domain->geometry.aperture_end = __DOMAIN_MAX_ADDR(dmar_domain->gaw);
4715 domain->geometry.force_aperture = true;
4716
00a77deb 4717 return domain;
38717946 4718}
38717946 4719
00a77deb 4720static void intel_iommu_domain_free(struct iommu_domain *domain)
38717946 4721{
00a77deb 4722 domain_exit(to_dmar_domain(domain));
38717946 4723}
38717946 4724
4c5478c9
JR
4725static int intel_iommu_attach_device(struct iommu_domain *domain,
4726 struct device *dev)
38717946 4727{
00a77deb 4728 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
fe40f1e0
WH
4729 struct intel_iommu *iommu;
4730 int addr_width;
156baca8 4731 u8 bus, devfn;
faa3d6f5 4732
c875d2c1
AW
4733 if (device_is_rmrr_locked(dev)) {
4734 dev_warn(dev, "Device is ineligible for IOMMU domain attach due to platform RMRR requirement. Contact your platform vendor.\n");
4735 return -EPERM;
4736 }
4737
7207d8f9
DW
4738 /* normally dev is not mapped */
4739 if (unlikely(domain_context_mapped(dev))) {
faa3d6f5
WH
4740 struct dmar_domain *old_domain;
4741
1525a29a 4742 old_domain = find_domain(dev);
faa3d6f5 4743 if (old_domain) {
d160aca5 4744 rcu_read_lock();
de7e8886 4745 dmar_remove_one_dev_info(old_domain, dev);
d160aca5 4746 rcu_read_unlock();
62c22167
JR
4747
4748 if (!domain_type_is_vm_or_si(old_domain) &&
4749 list_empty(&old_domain->devices))
4750 domain_exit(old_domain);
faa3d6f5
WH
4751 }
4752 }
4753
156baca8 4754 iommu = device_to_iommu(dev, &bus, &devfn);
fe40f1e0
WH
4755 if (!iommu)
4756 return -ENODEV;
4757
4758 /* check if this iommu agaw is sufficient for max mapped address */
4759 addr_width = agaw_to_width(iommu->agaw);
a99c47a2
TL
4760 if (addr_width > cap_mgaw(iommu->cap))
4761 addr_width = cap_mgaw(iommu->cap);
4762
4763 if (dmar_domain->max_addr > (1LL << addr_width)) {
9f10e5bf 4764 pr_err("%s: iommu width (%d) is not "
fe40f1e0 4765 "sufficient for the mapped address (%llx)\n",
a99c47a2 4766 __func__, addr_width, dmar_domain->max_addr);
fe40f1e0
WH
4767 return -EFAULT;
4768 }
a99c47a2
TL
4769 dmar_domain->gaw = addr_width;
4770
4771 /*
4772 * Knock out extra levels of page tables if necessary
4773 */
4774 while (iommu->agaw < dmar_domain->agaw) {
4775 struct dma_pte *pte;
4776
4777 pte = dmar_domain->pgd;
4778 if (dma_pte_present(pte)) {
25cbff16
SY
4779 dmar_domain->pgd = (struct dma_pte *)
4780 phys_to_virt(dma_pte_addr(pte));
7a661013 4781 free_pgtable_page(pte);
a99c47a2
TL
4782 }
4783 dmar_domain->agaw--;
4784 }
fe40f1e0 4785
28ccce0d 4786 return domain_add_dev_info(dmar_domain, dev);
38717946 4787}
38717946 4788
4c5478c9
JR
4789static void intel_iommu_detach_device(struct iommu_domain *domain,
4790 struct device *dev)
38717946 4791{
e6de0f8d 4792 dmar_remove_one_dev_info(to_dmar_domain(domain), dev);
faa3d6f5 4793}
c7151a8d 4794
b146a1c9
JR
4795static int intel_iommu_map(struct iommu_domain *domain,
4796 unsigned long iova, phys_addr_t hpa,
5009065d 4797 size_t size, int iommu_prot)
faa3d6f5 4798{
00a77deb 4799 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
fe40f1e0 4800 u64 max_addr;
dde57a21 4801 int prot = 0;
faa3d6f5 4802 int ret;
fe40f1e0 4803
dde57a21
JR
4804 if (iommu_prot & IOMMU_READ)
4805 prot |= DMA_PTE_READ;
4806 if (iommu_prot & IOMMU_WRITE)
4807 prot |= DMA_PTE_WRITE;
9cf06697
SY
4808 if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
4809 prot |= DMA_PTE_SNP;
dde57a21 4810
163cc52c 4811 max_addr = iova + size;
dde57a21 4812 if (dmar_domain->max_addr < max_addr) {
fe40f1e0
WH
4813 u64 end;
4814
4815 /* check if minimum agaw is sufficient for mapped address */
8954da1f 4816 end = __DOMAIN_MAX_ADDR(dmar_domain->gaw) + 1;
fe40f1e0 4817 if (end < max_addr) {
9f10e5bf 4818 pr_err("%s: iommu width (%d) is not "
fe40f1e0 4819 "sufficient for the mapped address (%llx)\n",
8954da1f 4820 __func__, dmar_domain->gaw, max_addr);
fe40f1e0
WH
4821 return -EFAULT;
4822 }
dde57a21 4823 dmar_domain->max_addr = max_addr;
fe40f1e0 4824 }
ad051221
DW
4825 /* Round up size to next multiple of PAGE_SIZE, if it and
4826 the low bits of hpa would take us onto the next page */
88cb6a74 4827 size = aligned_nrpages(hpa, size);
ad051221
DW
4828 ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
4829 hpa >> VTD_PAGE_SHIFT, size, prot);
faa3d6f5 4830 return ret;
38717946 4831}
38717946 4832
5009065d 4833static size_t intel_iommu_unmap(struct iommu_domain *domain,
ea8ea460 4834 unsigned long iova, size_t size)
38717946 4835{
00a77deb 4836 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
ea8ea460
DW
4837 struct page *freelist = NULL;
4838 struct intel_iommu *iommu;
4839 unsigned long start_pfn, last_pfn;
4840 unsigned int npages;
42e8c186 4841 int iommu_id, level = 0;
5cf0a76f
DW
4842
4843 /* Cope with horrid API which requires us to unmap more than the
4844 size argument if it happens to be a large-page mapping. */
dc02e46e 4845 BUG_ON(!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level));
5cf0a76f
DW
4846
4847 if (size < VTD_PAGE_SIZE << level_to_offset_bits(level))
4848 size = VTD_PAGE_SIZE << level_to_offset_bits(level);
4b99d352 4849
ea8ea460
DW
4850 start_pfn = iova >> VTD_PAGE_SHIFT;
4851 last_pfn = (iova + size - 1) >> VTD_PAGE_SHIFT;
4852
4853 freelist = domain_unmap(dmar_domain, start_pfn, last_pfn);
4854
4855 npages = last_pfn - start_pfn + 1;
4856
29a27719 4857 for_each_domain_iommu(iommu_id, dmar_domain) {
a1ddcbe9 4858 iommu = g_iommus[iommu_id];
ea8ea460 4859
42e8c186
JR
4860 iommu_flush_iotlb_psi(g_iommus[iommu_id], dmar_domain,
4861 start_pfn, npages, !freelist, 0);
ea8ea460
DW
4862 }
4863
4864 dma_free_pagelist(freelist);
fe40f1e0 4865
163cc52c
DW
4866 if (dmar_domain->max_addr == iova + size)
4867 dmar_domain->max_addr = iova;
b146a1c9 4868
5cf0a76f 4869 return size;
38717946 4870}
38717946 4871
d14d6577 4872static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
bb5547ac 4873 dma_addr_t iova)
38717946 4874{
00a77deb 4875 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
38717946 4876 struct dma_pte *pte;
5cf0a76f 4877 int level = 0;
faa3d6f5 4878 u64 phys = 0;
38717946 4879
5cf0a76f 4880 pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level);
38717946 4881 if (pte)
faa3d6f5 4882 phys = dma_pte_addr(pte);
38717946 4883
faa3d6f5 4884 return phys;
38717946 4885}
a8bcbb0d 4886
5d587b8d 4887static bool intel_iommu_capable(enum iommu_cap cap)
dbb9fd86 4888{
dbb9fd86 4889 if (cap == IOMMU_CAP_CACHE_COHERENCY)
5d587b8d 4890 return domain_update_iommu_snooping(NULL) == 1;
323f99cb 4891 if (cap == IOMMU_CAP_INTR_REMAP)
5d587b8d 4892 return irq_remapping_enabled == 1;
dbb9fd86 4893
5d587b8d 4894 return false;
dbb9fd86
SY
4895}
4896
abdfdde2
AW
4897static int intel_iommu_add_device(struct device *dev)
4898{
a5459cfe 4899 struct intel_iommu *iommu;
abdfdde2 4900 struct iommu_group *group;
156baca8 4901 u8 bus, devfn;
70ae6f0d 4902
a5459cfe
AW
4903 iommu = device_to_iommu(dev, &bus, &devfn);
4904 if (!iommu)
70ae6f0d
AW
4905 return -ENODEV;
4906
a5459cfe 4907 iommu_device_link(iommu->iommu_dev, dev);
a4ff1fc2 4908
e17f9ff4 4909 group = iommu_group_get_for_dev(dev);
783f157b 4910
e17f9ff4
AW
4911 if (IS_ERR(group))
4912 return PTR_ERR(group);
bcb71abe 4913
abdfdde2 4914 iommu_group_put(group);
e17f9ff4 4915 return 0;
abdfdde2 4916}
70ae6f0d 4917
abdfdde2
AW
4918static void intel_iommu_remove_device(struct device *dev)
4919{
a5459cfe
AW
4920 struct intel_iommu *iommu;
4921 u8 bus, devfn;
4922
4923 iommu = device_to_iommu(dev, &bus, &devfn);
4924 if (!iommu)
4925 return;
4926
abdfdde2 4927 iommu_group_remove_device(dev);
a5459cfe
AW
4928
4929 iommu_device_unlink(iommu->iommu_dev, dev);
70ae6f0d
AW
4930}
4931
b22f6434 4932static const struct iommu_ops intel_iommu_ops = {
5d587b8d 4933 .capable = intel_iommu_capable,
00a77deb
JR
4934 .domain_alloc = intel_iommu_domain_alloc,
4935 .domain_free = intel_iommu_domain_free,
a8bcbb0d
JR
4936 .attach_dev = intel_iommu_attach_device,
4937 .detach_dev = intel_iommu_detach_device,
b146a1c9
JR
4938 .map = intel_iommu_map,
4939 .unmap = intel_iommu_unmap,
315786eb 4940 .map_sg = default_iommu_map_sg,
a8bcbb0d 4941 .iova_to_phys = intel_iommu_iova_to_phys,
abdfdde2
AW
4942 .add_device = intel_iommu_add_device,
4943 .remove_device = intel_iommu_remove_device,
6d1c56a9 4944 .pgsize_bitmap = INTEL_IOMMU_PGSIZES,
a8bcbb0d 4945};
9af88143 4946
9452618e
DV
4947static void quirk_iommu_g4x_gfx(struct pci_dev *dev)
4948{
4949 /* G4x/GM45 integrated gfx dmar support is totally busted. */
9f10e5bf 4950 pr_info("Disabling IOMMU for graphics on this chipset\n");
9452618e
DV
4951 dmar_map_gfx = 0;
4952}
4953
4954DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_g4x_gfx);
4955DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_g4x_gfx);
4956DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_g4x_gfx);
4957DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_g4x_gfx);
4958DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_g4x_gfx);
4959DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_g4x_gfx);
4960DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_g4x_gfx);
4961
d34d6517 4962static void quirk_iommu_rwbf(struct pci_dev *dev)
9af88143
DW
4963{
4964 /*
4965 * Mobile 4 Series Chipset neglects to set RWBF capability,
210561ff 4966 * but needs it. Same seems to hold for the desktop versions.
9af88143 4967 */
9f10e5bf 4968 pr_info("Forcing write-buffer flush capability\n");
9af88143
DW
4969 rwbf_quirk = 1;
4970}
4971
4972DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
210561ff
DV
4973DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_rwbf);
4974DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_rwbf);
4975DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_rwbf);
4976DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_rwbf);
4977DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_rwbf);
4978DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_rwbf);
e0fc7e0b 4979
eecfd57f
AJ
4980#define GGC 0x52
4981#define GGC_MEMORY_SIZE_MASK (0xf << 8)
4982#define GGC_MEMORY_SIZE_NONE (0x0 << 8)
4983#define GGC_MEMORY_SIZE_1M (0x1 << 8)
4984#define GGC_MEMORY_SIZE_2M (0x3 << 8)
4985#define GGC_MEMORY_VT_ENABLED (0x8 << 8)
4986#define GGC_MEMORY_SIZE_2M_VT (0x9 << 8)
4987#define GGC_MEMORY_SIZE_3M_VT (0xa << 8)
4988#define GGC_MEMORY_SIZE_4M_VT (0xb << 8)
4989
d34d6517 4990static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
9eecabcb
DW
4991{
4992 unsigned short ggc;
4993
eecfd57f 4994 if (pci_read_config_word(dev, GGC, &ggc))
9eecabcb
DW
4995 return;
4996
eecfd57f 4997 if (!(ggc & GGC_MEMORY_VT_ENABLED)) {
9f10e5bf 4998 pr_info("BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
9eecabcb 4999 dmar_map_gfx = 0;
6fbcfb3e
DW
5000 } else if (dmar_map_gfx) {
5001 /* we have to ensure the gfx device is idle before we flush */
9f10e5bf 5002 pr_info("Disabling batched IOTLB flush on Ironlake\n");
6fbcfb3e
DW
5003 intel_iommu_strict = 1;
5004 }
9eecabcb
DW
5005}
5006DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt);
5007DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt);
5008DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt);
5009DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt);
5010
e0fc7e0b
DW
5011/* On Tylersburg chipsets, some BIOSes have been known to enable the
5012 ISOCH DMAR unit for the Azalia sound device, but not give it any
5013 TLB entries, which causes it to deadlock. Check for that. We do
5014 this in a function called from init_dmars(), instead of in a PCI
5015 quirk, because we don't want to print the obnoxious "BIOS broken"
5016 message if VT-d is actually disabled.
5017*/
5018static void __init check_tylersburg_isoch(void)
5019{
5020 struct pci_dev *pdev;
5021 uint32_t vtisochctrl;
5022
5023 /* If there's no Azalia in the system anyway, forget it. */
5024 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL);
5025 if (!pdev)
5026 return;
5027 pci_dev_put(pdev);
5028
5029 /* System Management Registers. Might be hidden, in which case
5030 we can't do the sanity check. But that's OK, because the
5031 known-broken BIOSes _don't_ actually hide it, so far. */
5032 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL);
5033 if (!pdev)
5034 return;
5035
5036 if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) {
5037 pci_dev_put(pdev);
5038 return;
5039 }
5040
5041 pci_dev_put(pdev);
5042
5043 /* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */
5044 if (vtisochctrl & 1)
5045 return;
5046
5047 /* Drop all bits other than the number of TLB entries */
5048 vtisochctrl &= 0x1c;
5049
5050 /* If we have the recommended number of TLB entries (16), fine. */
5051 if (vtisochctrl == 0x10)
5052 return;
5053
5054 /* Zero TLB entries? You get to ride the short bus to school. */
5055 if (!vtisochctrl) {
5056 WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
5057 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
5058 dmi_get_system_info(DMI_BIOS_VENDOR),
5059 dmi_get_system_info(DMI_BIOS_VERSION),
5060 dmi_get_system_info(DMI_PRODUCT_VERSION));
5061 iommu_identity_mapping |= IDENTMAP_AZALIA;
5062 return;
5063 }
9f10e5bf
JR
5064
5065 pr_warn("Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",
e0fc7e0b
DW
5066 vtisochctrl);
5067}