iommu/dma: Fix iova map result check bug
[linux-block.git] / drivers / iommu / dma-iommu.c
CommitLineData
caab277b 1// SPDX-License-Identifier: GPL-2.0-only
0db2e5d1
RM
2/*
3 * A fairly generic DMA-API to IOMMU-API glue layer.
4 *
5 * Copyright (C) 2014-2015 ARM Ltd.
6 *
7 * based in part on arch/arm/mm/dma-mapping.c:
8 * Copyright (C) 2000-2004 Russell King
0db2e5d1
RM
9 */
10
f51dc892 11#include <linux/acpi_iort.h>
a17e3026
RM
12#include <linux/atomic.h>
13#include <linux/crash_dump.h>
0db2e5d1 14#include <linux/device.h>
a17e3026 15#include <linux/dma-direct.h>
0db2e5d1 16#include <linux/dma-iommu.h>
a17e3026 17#include <linux/dma-map-ops.h>
5b11e9cd 18#include <linux/gfp.h>
0db2e5d1
RM
19#include <linux/huge_mm.h>
20#include <linux/iommu.h>
21#include <linux/iova.h>
44bb7e24 22#include <linux/irq.h>
0db2e5d1 23#include <linux/mm.h>
c1864790 24#include <linux/mutex.h>
fade1ec0 25#include <linux/pci.h>
5b11e9cd 26#include <linux/scatterlist.h>
a17e3026
RM
27#include <linux/spinlock.h>
28#include <linux/swiotlb.h>
5b11e9cd 29#include <linux/vmalloc.h>
0db2e5d1 30
44bb7e24
RM
31struct iommu_dma_msi_page {
32 struct list_head list;
33 dma_addr_t iova;
34 phys_addr_t phys;
35};
36
fdbe574e
RM
37enum iommu_dma_cookie_type {
38 IOMMU_DMA_IOVA_COOKIE,
39 IOMMU_DMA_MSI_COOKIE,
40};
41
44bb7e24 42struct iommu_dma_cookie {
fdbe574e
RM
43 enum iommu_dma_cookie_type type;
44 union {
45 /* Full allocator for IOMMU_DMA_IOVA_COOKIE */
a17e3026
RM
46 struct {
47 struct iova_domain iovad;
48
49 struct iova_fq __percpu *fq; /* Flush queue */
50 /* Number of TLB flushes that have been started */
51 atomic64_t fq_flush_start_cnt;
52 /* Number of TLB flushes that have been finished */
53 atomic64_t fq_flush_finish_cnt;
54 /* Timer to regularily empty the flush queues */
55 struct timer_list fq_timer;
56 /* 1 when timer is active, 0 when not */
57 atomic_t fq_timer_on;
58 };
fdbe574e
RM
59 /* Trivial linear page allocator for IOMMU_DMA_MSI_COOKIE */
60 dma_addr_t msi_iova;
61 };
62 struct list_head msi_page_list;
2da274cd
ZL
63
64 /* Domain for flush queue callback; NULL if flush queue not in use */
65 struct iommu_domain *fq_domain;
44bb7e24
RM
66};
67
a8e8af35 68static DEFINE_STATIC_KEY_FALSE(iommu_deferred_attach_enabled);
3542dcb1
RM
69bool iommu_dma_forcedac __read_mostly;
70
71static int __init iommu_dma_forcedac_setup(char *str)
72{
73 int ret = kstrtobool(str, &iommu_dma_forcedac);
74
75 if (!ret && iommu_dma_forcedac)
76 pr_info("Forcing DAC for PCI devices\n");
77 return ret;
78}
79early_param("iommu.forcedac", iommu_dma_forcedac_setup);
a8e8af35 80
a17e3026
RM
81/* Number of entries per flush queue */
82#define IOVA_FQ_SIZE 256
83
84/* Timeout (in ms) after which entries are flushed from the queue */
85#define IOVA_FQ_TIMEOUT 10
86
87/* Flush queue entry for deferred flushing */
88struct iova_fq_entry {
89 unsigned long iova_pfn;
90 unsigned long pages;
91 struct list_head freelist;
92 u64 counter; /* Flush counter when this entry was added */
93};
94
95/* Per-CPU flush queue structure */
96struct iova_fq {
97 struct iova_fq_entry entries[IOVA_FQ_SIZE];
98 unsigned int head, tail;
99 spinlock_t lock;
100};
101
f7f07484
RM
102#define fq_ring_for_each(i, fq) \
103 for ((i) = (fq)->head; (i) != (fq)->tail; (i) = ((i) + 1) % IOVA_FQ_SIZE)
104
105static inline bool fq_full(struct iova_fq *fq)
106{
107 assert_spin_locked(&fq->lock);
108 return (((fq->tail + 1) % IOVA_FQ_SIZE) == fq->head);
109}
110
a17e3026 111static inline unsigned int fq_ring_add(struct iova_fq *fq)
f7f07484 112{
a17e3026 113 unsigned int idx = fq->tail;
f7f07484
RM
114
115 assert_spin_locked(&fq->lock);
116
117 fq->tail = (idx + 1) % IOVA_FQ_SIZE;
118
119 return idx;
120}
121
a17e3026 122static void fq_ring_free(struct iommu_dma_cookie *cookie, struct iova_fq *fq)
f7f07484 123{
a17e3026
RM
124 u64 counter = atomic64_read(&cookie->fq_flush_finish_cnt);
125 unsigned int idx;
f7f07484
RM
126
127 assert_spin_locked(&fq->lock);
128
129 fq_ring_for_each(idx, fq) {
130
131 if (fq->entries[idx].counter >= counter)
132 break;
133
134 put_pages_list(&fq->entries[idx].freelist);
a17e3026 135 free_iova_fast(&cookie->iovad,
f7f07484
RM
136 fq->entries[idx].iova_pfn,
137 fq->entries[idx].pages);
138
139 fq->head = (fq->head + 1) % IOVA_FQ_SIZE;
140 }
141}
142
a17e3026 143static void fq_flush_iotlb(struct iommu_dma_cookie *cookie)
f7f07484 144{
a17e3026
RM
145 atomic64_inc(&cookie->fq_flush_start_cnt);
146 cookie->fq_domain->ops->flush_iotlb_all(cookie->fq_domain);
147 atomic64_inc(&cookie->fq_flush_finish_cnt);
f7f07484
RM
148}
149
150static void fq_flush_timeout(struct timer_list *t)
151{
a17e3026 152 struct iommu_dma_cookie *cookie = from_timer(cookie, t, fq_timer);
f7f07484
RM
153 int cpu;
154
a17e3026
RM
155 atomic_set(&cookie->fq_timer_on, 0);
156 fq_flush_iotlb(cookie);
f7f07484
RM
157
158 for_each_possible_cpu(cpu) {
159 unsigned long flags;
160 struct iova_fq *fq;
161
a17e3026 162 fq = per_cpu_ptr(cookie->fq, cpu);
f7f07484 163 spin_lock_irqsave(&fq->lock, flags);
a17e3026 164 fq_ring_free(cookie, fq);
f7f07484
RM
165 spin_unlock_irqrestore(&fq->lock, flags);
166 }
167}
168
a17e3026 169static void queue_iova(struct iommu_dma_cookie *cookie,
f7f07484
RM
170 unsigned long pfn, unsigned long pages,
171 struct list_head *freelist)
172{
173 struct iova_fq *fq;
174 unsigned long flags;
a17e3026 175 unsigned int idx;
f7f07484
RM
176
177 /*
178 * Order against the IOMMU driver's pagetable update from unmapping
a17e3026 179 * @pte, to guarantee that fq_flush_iotlb() observes that if called
f7f07484
RM
180 * from a different CPU before we release the lock below. Full barrier
181 * so it also pairs with iommu_dma_init_fq() to avoid seeing partially
182 * written fq state here.
183 */
184 smp_mb();
185
a17e3026 186 fq = raw_cpu_ptr(cookie->fq);
f7f07484
RM
187 spin_lock_irqsave(&fq->lock, flags);
188
189 /*
190 * First remove all entries from the flush queue that have already been
191 * flushed out on another CPU. This makes the fq_full() check below less
192 * likely to be true.
193 */
a17e3026 194 fq_ring_free(cookie, fq);
f7f07484
RM
195
196 if (fq_full(fq)) {
a17e3026
RM
197 fq_flush_iotlb(cookie);
198 fq_ring_free(cookie, fq);
f7f07484
RM
199 }
200
201 idx = fq_ring_add(fq);
202
203 fq->entries[idx].iova_pfn = pfn;
204 fq->entries[idx].pages = pages;
a17e3026 205 fq->entries[idx].counter = atomic64_read(&cookie->fq_flush_start_cnt);
f7f07484
RM
206 list_splice(freelist, &fq->entries[idx].freelist);
207
208 spin_unlock_irqrestore(&fq->lock, flags);
209
210 /* Avoid false sharing as much as possible. */
a17e3026
RM
211 if (!atomic_read(&cookie->fq_timer_on) &&
212 !atomic_xchg(&cookie->fq_timer_on, 1))
213 mod_timer(&cookie->fq_timer,
f7f07484
RM
214 jiffies + msecs_to_jiffies(IOVA_FQ_TIMEOUT));
215}
216
a17e3026 217static void iommu_dma_free_fq(struct iommu_dma_cookie *cookie)
f7f07484
RM
218{
219 int cpu, idx;
220
a17e3026 221 if (!cookie->fq)
f7f07484
RM
222 return;
223
a17e3026
RM
224 del_timer_sync(&cookie->fq_timer);
225 /* The IOVAs will be torn down separately, so just free our queued pages */
f7f07484 226 for_each_possible_cpu(cpu) {
a17e3026 227 struct iova_fq *fq = per_cpu_ptr(cookie->fq, cpu);
f7f07484
RM
228
229 fq_ring_for_each(idx, fq)
230 put_pages_list(&fq->entries[idx].freelist);
231 }
232
a17e3026 233 free_percpu(cookie->fq);
f7f07484
RM
234}
235
a17e3026
RM
236/* sysfs updates are serialised by the mutex of the group owning @domain */
237int iommu_dma_init_fq(struct iommu_domain *domain)
f7f07484 238{
a17e3026 239 struct iommu_dma_cookie *cookie = domain->iova_cookie;
f7f07484
RM
240 struct iova_fq __percpu *queue;
241 int i, cpu;
242
a17e3026
RM
243 if (cookie->fq_domain)
244 return 0;
245
246 atomic64_set(&cookie->fq_flush_start_cnt, 0);
247 atomic64_set(&cookie->fq_flush_finish_cnt, 0);
f7f07484
RM
248
249 queue = alloc_percpu(struct iova_fq);
a17e3026
RM
250 if (!queue) {
251 pr_warn("iova flush queue initialization failed\n");
f7f07484 252 return -ENOMEM;
a17e3026 253 }
f7f07484
RM
254
255 for_each_possible_cpu(cpu) {
256 struct iova_fq *fq = per_cpu_ptr(queue, cpu);
257
258 fq->head = 0;
259 fq->tail = 0;
260
261 spin_lock_init(&fq->lock);
262
263 for (i = 0; i < IOVA_FQ_SIZE; i++)
264 INIT_LIST_HEAD(&fq->entries[i].freelist);
265 }
266
a17e3026 267 cookie->fq = queue;
f7f07484 268
a17e3026
RM
269 timer_setup(&cookie->fq_timer, fq_flush_timeout, 0);
270 atomic_set(&cookie->fq_timer_on, 0);
271 /*
272 * Prevent incomplete fq state being observable. Pairs with path from
273 * __iommu_dma_unmap() through iommu_dma_free_iova() to queue_iova()
274 */
275 smp_wmb();
276 WRITE_ONCE(cookie->fq_domain, domain);
f7f07484
RM
277 return 0;
278}
279
fdbe574e
RM
280static inline size_t cookie_msi_granule(struct iommu_dma_cookie *cookie)
281{
282 if (cookie->type == IOMMU_DMA_IOVA_COOKIE)
283 return cookie->iovad.granule;
284 return PAGE_SIZE;
285}
286
fdbe574e
RM
287static struct iommu_dma_cookie *cookie_alloc(enum iommu_dma_cookie_type type)
288{
289 struct iommu_dma_cookie *cookie;
290
291 cookie = kzalloc(sizeof(*cookie), GFP_KERNEL);
292 if (cookie) {
fdbe574e
RM
293 INIT_LIST_HEAD(&cookie->msi_page_list);
294 cookie->type = type;
295 }
296 return cookie;
44bb7e24
RM
297}
298
0db2e5d1
RM
299/**
300 * iommu_get_dma_cookie - Acquire DMA-API resources for a domain
301 * @domain: IOMMU domain to prepare for DMA-API usage
0db2e5d1
RM
302 */
303int iommu_get_dma_cookie(struct iommu_domain *domain)
fdbe574e
RM
304{
305 if (domain->iova_cookie)
306 return -EEXIST;
307
308 domain->iova_cookie = cookie_alloc(IOMMU_DMA_IOVA_COOKIE);
309 if (!domain->iova_cookie)
310 return -ENOMEM;
311
312 return 0;
313}
fdbe574e
RM
314
315/**
316 * iommu_get_msi_cookie - Acquire just MSI remapping resources
317 * @domain: IOMMU domain to prepare
318 * @base: Start address of IOVA region for MSI mappings
319 *
320 * Users who manage their own IOVA allocation and do not want DMA API support,
321 * but would still like to take advantage of automatic MSI remapping, can use
322 * this to initialise their own domain appropriately. Users should reserve a
323 * contiguous IOVA region, starting at @base, large enough to accommodate the
324 * number of PAGE_SIZE mappings necessary to cover every MSI doorbell address
325 * used by the devices attached to @domain.
326 */
327int iommu_get_msi_cookie(struct iommu_domain *domain, dma_addr_t base)
0db2e5d1 328{
44bb7e24 329 struct iommu_dma_cookie *cookie;
0db2e5d1 330
fdbe574e
RM
331 if (domain->type != IOMMU_DOMAIN_UNMANAGED)
332 return -EINVAL;
333
0db2e5d1
RM
334 if (domain->iova_cookie)
335 return -EEXIST;
336
fdbe574e 337 cookie = cookie_alloc(IOMMU_DMA_MSI_COOKIE);
44bb7e24
RM
338 if (!cookie)
339 return -ENOMEM;
0db2e5d1 340
fdbe574e 341 cookie->msi_iova = base;
44bb7e24
RM
342 domain->iova_cookie = cookie;
343 return 0;
0db2e5d1 344}
fdbe574e 345EXPORT_SYMBOL(iommu_get_msi_cookie);
0db2e5d1
RM
346
347/**
348 * iommu_put_dma_cookie - Release a domain's DMA mapping resources
fdbe574e
RM
349 * @domain: IOMMU domain previously prepared by iommu_get_dma_cookie() or
350 * iommu_get_msi_cookie()
0db2e5d1
RM
351 */
352void iommu_put_dma_cookie(struct iommu_domain *domain)
353{
44bb7e24
RM
354 struct iommu_dma_cookie *cookie = domain->iova_cookie;
355 struct iommu_dma_msi_page *msi, *tmp;
0db2e5d1 356
44bb7e24 357 if (!cookie)
0db2e5d1
RM
358 return;
359
f7f07484 360 if (cookie->type == IOMMU_DMA_IOVA_COOKIE && cookie->iovad.granule) {
a17e3026 361 iommu_dma_free_fq(cookie);
44bb7e24 362 put_iova_domain(&cookie->iovad);
f7f07484 363 }
44bb7e24
RM
364
365 list_for_each_entry_safe(msi, tmp, &cookie->msi_page_list, list) {
366 list_del(&msi->list);
367 kfree(msi);
368 }
369 kfree(cookie);
0db2e5d1
RM
370 domain->iova_cookie = NULL;
371}
0db2e5d1 372
273df963
RM
373/**
374 * iommu_dma_get_resv_regions - Reserved region driver helper
375 * @dev: Device from iommu_get_resv_regions()
376 * @list: Reserved region list from iommu_get_resv_regions()
377 *
378 * IOMMU drivers can use this to implement their .get_resv_regions callback
cd2c9fcf
SK
379 * for general non-IOMMU-specific reservations. Currently, this covers GICv3
380 * ITS region reservation on ACPI based ARM platforms that may require HW MSI
381 * reservation.
273df963
RM
382 */
383void iommu_dma_get_resv_regions(struct device *dev, struct list_head *list)
fade1ec0 384{
fade1ec0 385
98cc4f71 386 if (!is_of_node(dev_iommu_fwspec_get(dev)->iommu_fwnode))
cd2c9fcf 387 iort_iommu_msi_get_resv_regions(dev, list);
273df963 388
fade1ec0 389}
273df963 390EXPORT_SYMBOL(iommu_dma_get_resv_regions);
fade1ec0 391
7c1b058c
RM
392static int cookie_init_hw_msi_region(struct iommu_dma_cookie *cookie,
393 phys_addr_t start, phys_addr_t end)
394{
395 struct iova_domain *iovad = &cookie->iovad;
396 struct iommu_dma_msi_page *msi_page;
397 int i, num_pages;
398
399 start -= iova_offset(iovad, start);
400 num_pages = iova_align(iovad, end - start) >> iova_shift(iovad);
401
7c1b058c 402 for (i = 0; i < num_pages; i++) {
65ac74f1
MZ
403 msi_page = kmalloc(sizeof(*msi_page), GFP_KERNEL);
404 if (!msi_page)
405 return -ENOMEM;
406
407 msi_page->phys = start;
408 msi_page->iova = start;
409 INIT_LIST_HEAD(&msi_page->list);
410 list_add(&msi_page->list, &cookie->msi_page_list);
7c1b058c
RM
411 start += iovad->granule;
412 }
413
414 return 0;
415}
416
aadad097 417static int iova_reserve_pci_windows(struct pci_dev *dev,
cd2c9fcf
SK
418 struct iova_domain *iovad)
419{
420 struct pci_host_bridge *bridge = pci_find_host_bridge(dev->bus);
421 struct resource_entry *window;
422 unsigned long lo, hi;
aadad097 423 phys_addr_t start = 0, end;
cd2c9fcf
SK
424
425 resource_list_for_each_entry(window, &bridge->windows) {
426 if (resource_type(window->res) != IORESOURCE_MEM)
427 continue;
428
429 lo = iova_pfn(iovad, window->res->start - window->offset);
430 hi = iova_pfn(iovad, window->res->end - window->offset);
431 reserve_iova(iovad, lo, hi);
432 }
aadad097
SM
433
434 /* Get reserved DMA windows from host bridge */
435 resource_list_for_each_entry(window, &bridge->dma_ranges) {
436 end = window->res->start - window->offset;
437resv_iova:
438 if (end > start) {
439 lo = iova_pfn(iovad, start);
440 hi = iova_pfn(iovad, end);
441 reserve_iova(iovad, lo, hi);
571f3160 442 } else if (end < start) {
aadad097 443 /* dma_ranges list should be sorted */
571f3160 444 dev_err(&dev->dev,
7154cbd3
JR
445 "Failed to reserve IOVA [%pa-%pa]\n",
446 &start, &end);
aadad097
SM
447 return -EINVAL;
448 }
449
450 start = window->res->end - window->offset + 1;
451 /* If window is last entry */
452 if (window->node.next == &bridge->dma_ranges &&
29fcea8c
AB
453 end != ~(phys_addr_t)0) {
454 end = ~(phys_addr_t)0;
aadad097
SM
455 goto resv_iova;
456 }
457 }
458
459 return 0;
cd2c9fcf
SK
460}
461
7c1b058c
RM
462static int iova_reserve_iommu_regions(struct device *dev,
463 struct iommu_domain *domain)
464{
465 struct iommu_dma_cookie *cookie = domain->iova_cookie;
466 struct iova_domain *iovad = &cookie->iovad;
467 struct iommu_resv_region *region;
468 LIST_HEAD(resv_regions);
469 int ret = 0;
470
aadad097
SM
471 if (dev_is_pci(dev)) {
472 ret = iova_reserve_pci_windows(to_pci_dev(dev), iovad);
473 if (ret)
474 return ret;
475 }
cd2c9fcf 476
7c1b058c
RM
477 iommu_get_resv_regions(dev, &resv_regions);
478 list_for_each_entry(region, &resv_regions, list) {
479 unsigned long lo, hi;
480
481 /* We ARE the software that manages these! */
482 if (region->type == IOMMU_RESV_SW_MSI)
483 continue;
484
485 lo = iova_pfn(iovad, region->start);
486 hi = iova_pfn(iovad, region->start + region->length - 1);
487 reserve_iova(iovad, lo, hi);
488
489 if (region->type == IOMMU_RESV_MSI)
490 ret = cookie_init_hw_msi_region(cookie, region->start,
491 region->start + region->length);
492 if (ret)
493 break;
494 }
495 iommu_put_resv_regions(dev, &resv_regions);
496
497 return ret;
498}
499
82c3cefb
LB
500static bool dev_is_untrusted(struct device *dev)
501{
502 return dev_is_pci(dev) && to_pci_dev(dev)->untrusted;
503}
504
2e727bff
DS
505static bool dev_use_swiotlb(struct device *dev)
506{
507 return IS_ENABLED(CONFIG_SWIOTLB) && dev_is_untrusted(dev);
508}
509
0db2e5d1
RM
510/**
511 * iommu_dma_init_domain - Initialise a DMA mapping domain
512 * @domain: IOMMU domain previously prepared by iommu_get_dma_cookie()
513 * @base: IOVA at which the mappable address space starts
ac6d7046 514 * @limit: Last address of the IOVA space
fade1ec0 515 * @dev: Device the domain is being initialised for
0db2e5d1 516 *
ac6d7046 517 * @base and @limit + 1 should be exact multiples of IOMMU page granularity to
0db2e5d1
RM
518 * avoid rounding surprises. If necessary, we reserve the page at address 0
519 * to ensure it is an invalid IOVA. It is safe to reinitialise a domain, but
520 * any change which could make prior IOVAs invalid will fail.
521 */
06d60728 522static int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base,
ac6d7046 523 dma_addr_t limit, struct device *dev)
0db2e5d1 524{
fdbe574e 525 struct iommu_dma_cookie *cookie = domain->iova_cookie;
c61a4633 526 unsigned long order, base_pfn;
6b0c54e7 527 struct iova_domain *iovad;
32e92d9f 528 int ret;
0db2e5d1 529
fdbe574e
RM
530 if (!cookie || cookie->type != IOMMU_DMA_IOVA_COOKIE)
531 return -EINVAL;
0db2e5d1 532
6b0c54e7
YL
533 iovad = &cookie->iovad;
534
0db2e5d1 535 /* Use the smallest supported page size for IOVA granularity */
d16e0faa 536 order = __ffs(domain->pgsize_bitmap);
0db2e5d1 537 base_pfn = max_t(unsigned long, 1, base >> order);
0db2e5d1
RM
538
539 /* Check the domain allows at least some access to the device... */
540 if (domain->geometry.force_aperture) {
541 if (base > domain->geometry.aperture_end ||
ac6d7046 542 limit < domain->geometry.aperture_start) {
0db2e5d1
RM
543 pr_warn("specified DMA range outside IOMMU capability\n");
544 return -EFAULT;
545 }
546 /* ...then finally give it a kicking to make sure it fits */
547 base_pfn = max_t(unsigned long, base_pfn,
548 domain->geometry.aperture_start >> order);
0db2e5d1
RM
549 }
550
f51d7bb7 551 /* start_pfn is always nonzero for an already-initialised domain */
0db2e5d1
RM
552 if (iovad->start_pfn) {
553 if (1UL << order != iovad->granule ||
f51d7bb7 554 base_pfn != iovad->start_pfn) {
0db2e5d1
RM
555 pr_warn("Incompatible range for DMA domain\n");
556 return -EFAULT;
557 }
7c1b058c
RM
558
559 return 0;
0db2e5d1 560 }
7c1b058c 561
aa3ac946 562 init_iova_domain(iovad, 1UL << order, base_pfn);
32e92d9f
JG
563 ret = iova_domain_init_rcaches(iovad);
564 if (ret)
565 return ret;
2da274cd 566
c208916f 567 /* If the FQ fails we can simply fall back to strict mode */
452e69b5
RM
568 if (domain->type == IOMMU_DOMAIN_DMA_FQ && iommu_dma_init_fq(domain))
569 domain->type = IOMMU_DOMAIN_DMA;
7c1b058c
RM
570
571 return iova_reserve_iommu_regions(dev, domain);
0db2e5d1 572}
0db2e5d1
RM
573
574/**
737c85ca
MH
575 * dma_info_to_prot - Translate DMA API directions and attributes to IOMMU API
576 * page flags.
0db2e5d1
RM
577 * @dir: Direction of DMA transfer
578 * @coherent: Is the DMA master cache-coherent?
737c85ca 579 * @attrs: DMA attributes for the mapping
0db2e5d1
RM
580 *
581 * Return: corresponding IOMMU API page protection flags
582 */
06d60728 583static int dma_info_to_prot(enum dma_data_direction dir, bool coherent,
737c85ca 584 unsigned long attrs)
0db2e5d1
RM
585{
586 int prot = coherent ? IOMMU_CACHE : 0;
587
737c85ca
MH
588 if (attrs & DMA_ATTR_PRIVILEGED)
589 prot |= IOMMU_PRIV;
590
0db2e5d1
RM
591 switch (dir) {
592 case DMA_BIDIRECTIONAL:
593 return prot | IOMMU_READ | IOMMU_WRITE;
594 case DMA_TO_DEVICE:
595 return prot | IOMMU_READ;
596 case DMA_FROM_DEVICE:
597 return prot | IOMMU_WRITE;
598 default:
599 return 0;
600 }
601}
602
842fe519 603static dma_addr_t iommu_dma_alloc_iova(struct iommu_domain *domain,
bd036d2f 604 size_t size, u64 dma_limit, struct device *dev)
0db2e5d1 605{
a44e6657
RM
606 struct iommu_dma_cookie *cookie = domain->iova_cookie;
607 struct iova_domain *iovad = &cookie->iovad;
bb65a64c 608 unsigned long shift, iova_len, iova = 0;
0db2e5d1 609
a44e6657
RM
610 if (cookie->type == IOMMU_DMA_MSI_COOKIE) {
611 cookie->msi_iova += size;
612 return cookie->msi_iova - size;
613 }
614
615 shift = iova_shift(iovad);
616 iova_len = size >> shift;
617
a7ba70f1 618 dma_limit = min_not_zero(dma_limit, dev->bus_dma_limit);
03bfdc31 619
c987ff0d 620 if (domain->geometry.force_aperture)
bd036d2f 621 dma_limit = min(dma_limit, (u64)domain->geometry.aperture_end);
122fac03
RM
622
623 /* Try to get PCI devices a SAC address */
3542dcb1 624 if (dma_limit > DMA_BIT_MASK(32) && !iommu_dma_forcedac && dev_is_pci(dev))
538d5b33
TN
625 iova = alloc_iova_fast(iovad, iova_len,
626 DMA_BIT_MASK(32) >> shift, false);
bb65a64c 627
122fac03 628 if (!iova)
538d5b33
TN
629 iova = alloc_iova_fast(iovad, iova_len, dma_limit >> shift,
630 true);
122fac03 631
bb65a64c 632 return (dma_addr_t)iova << shift;
0db2e5d1
RM
633}
634
842fe519 635static void iommu_dma_free_iova(struct iommu_dma_cookie *cookie,
452e69b5 636 dma_addr_t iova, size_t size, struct iommu_iotlb_gather *gather)
0db2e5d1 637{
842fe519 638 struct iova_domain *iovad = &cookie->iovad;
0db2e5d1 639
a44e6657 640 /* The MSI case is only ever cleaning up its most recent allocation */
bb65a64c 641 if (cookie->type == IOMMU_DMA_MSI_COOKIE)
a44e6657 642 cookie->msi_iova -= size;
452e69b5 643 else if (gather && gather->queued)
a17e3026 644 queue_iova(cookie, iova_pfn(iovad, iova),
2a2b8eaa 645 size >> iova_shift(iovad),
87f60cc6 646 &gather->freelist);
bb65a64c 647 else
1cc896ed
RM
648 free_iova_fast(iovad, iova_pfn(iovad, iova),
649 size >> iova_shift(iovad));
842fe519
RM
650}
651
b61d271e 652static void __iommu_dma_unmap(struct device *dev, dma_addr_t dma_addr,
842fe519
RM
653 size_t size)
654{
b61d271e 655 struct iommu_domain *domain = iommu_get_dma_domain(dev);
a44e6657
RM
656 struct iommu_dma_cookie *cookie = domain->iova_cookie;
657 struct iova_domain *iovad = &cookie->iovad;
842fe519 658 size_t iova_off = iova_offset(iovad, dma_addr);
a7d20dc1
WD
659 struct iommu_iotlb_gather iotlb_gather;
660 size_t unmapped;
842fe519
RM
661
662 dma_addr -= iova_off;
663 size = iova_align(iovad, size + iova_off);
a7d20dc1 664 iommu_iotlb_gather_init(&iotlb_gather);
452e69b5 665 iotlb_gather.queued = READ_ONCE(cookie->fq_domain);
a7d20dc1
WD
666
667 unmapped = iommu_unmap_fast(domain, dma_addr, size, &iotlb_gather);
668 WARN_ON(unmapped != size);
842fe519 669
452e69b5 670 if (!iotlb_gather.queued)
aae4c8e2 671 iommu_iotlb_sync(domain, &iotlb_gather);
452e69b5 672 iommu_dma_free_iova(cookie, dma_addr, size, &iotlb_gather);
0db2e5d1
RM
673}
674
92aec09c 675static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys,
bd036d2f 676 size_t size, int prot, u64 dma_mask)
92aec09c 677{
b61d271e 678 struct iommu_domain *domain = iommu_get_dma_domain(dev);
92aec09c 679 struct iommu_dma_cookie *cookie = domain->iova_cookie;
8af23fad
RM
680 struct iova_domain *iovad = &cookie->iovad;
681 size_t iova_off = iova_offset(iovad, phys);
92aec09c
CH
682 dma_addr_t iova;
683
a8e8af35 684 if (static_branch_unlikely(&iommu_deferred_attach_enabled) &&
3ab65729 685 iommu_deferred_attach(dev, domain))
795bbbb9
TM
686 return DMA_MAPPING_ERROR;
687
8af23fad 688 size = iova_align(iovad, size + iova_off);
92aec09c 689
6e235020 690 iova = iommu_dma_alloc_iova(domain, size, dma_mask, dev);
92aec09c
CH
691 if (!iova)
692 return DMA_MAPPING_ERROR;
693
781ca2de 694 if (iommu_map_atomic(domain, iova, phys - iova_off, size, prot)) {
2a2b8eaa 695 iommu_dma_free_iova(cookie, iova, size, NULL);
92aec09c
CH
696 return DMA_MAPPING_ERROR;
697 }
698 return iova + iova_off;
699}
700
0db2e5d1
RM
701static void __iommu_dma_free_pages(struct page **pages, int count)
702{
703 while (count--)
704 __free_page(pages[count]);
705 kvfree(pages);
706}
707
c4b17afb
GK
708static struct page **__iommu_dma_alloc_pages(struct device *dev,
709 unsigned int count, unsigned long order_mask, gfp_t gfp)
0db2e5d1
RM
710{
711 struct page **pages;
c4b17afb 712 unsigned int i = 0, nid = dev_to_node(dev);
3b6b7e19
RM
713
714 order_mask &= (2U << MAX_ORDER) - 1;
715 if (!order_mask)
716 return NULL;
0db2e5d1 717
ab6f4b00 718 pages = kvcalloc(count, sizeof(*pages), GFP_KERNEL);
0db2e5d1
RM
719 if (!pages)
720 return NULL;
721
722 /* IOMMU can map any pages, so himem can also be used here */
723 gfp |= __GFP_NOWARN | __GFP_HIGHMEM;
724
4604393c
RM
725 /* It makes no sense to muck about with huge pages */
726 gfp &= ~__GFP_COMP;
727
0db2e5d1
RM
728 while (count) {
729 struct page *page = NULL;
3b6b7e19 730 unsigned int order_size;
0db2e5d1
RM
731
732 /*
733 * Higher-order allocations are a convenience rather
734 * than a necessity, hence using __GFP_NORETRY until
3b6b7e19 735 * falling back to minimum-order allocations.
0db2e5d1 736 */
3b6b7e19
RM
737 for (order_mask &= (2U << __fls(count)) - 1;
738 order_mask; order_mask &= ~order_size) {
739 unsigned int order = __fls(order_mask);
c4b17afb 740 gfp_t alloc_flags = gfp;
3b6b7e19
RM
741
742 order_size = 1U << order;
c4b17afb
GK
743 if (order_mask > order_size)
744 alloc_flags |= __GFP_NORETRY;
745 page = alloc_pages_node(nid, alloc_flags, order);
0db2e5d1
RM
746 if (!page)
747 continue;
4604393c 748 if (order)
0db2e5d1 749 split_page(page, order);
4604393c 750 break;
0db2e5d1 751 }
0db2e5d1
RM
752 if (!page) {
753 __iommu_dma_free_pages(pages, i);
754 return NULL;
755 }
3b6b7e19
RM
756 count -= order_size;
757 while (order_size--)
0db2e5d1
RM
758 pages[i++] = page++;
759 }
760 return pages;
761}
762
8230ce9a
CH
763/*
764 * If size is less than PAGE_SIZE, then a full CPU page will be allocated,
0db2e5d1 765 * but an IOMMU which supports smaller pages might not map the whole thing.
0db2e5d1 766 */
8230ce9a
CH
767static struct page **__iommu_dma_alloc_noncontiguous(struct device *dev,
768 size_t size, struct sg_table *sgt, gfp_t gfp, pgprot_t prot,
e8d39a90 769 unsigned long attrs)
0db2e5d1 770{
43c5bf11 771 struct iommu_domain *domain = iommu_get_dma_domain(dev);
842fe519
RM
772 struct iommu_dma_cookie *cookie = domain->iova_cookie;
773 struct iova_domain *iovad = &cookie->iovad;
21b95aaf
CH
774 bool coherent = dev_is_dma_coherent(dev);
775 int ioprot = dma_info_to_prot(DMA_BIDIRECTIONAL, coherent, attrs);
21b95aaf 776 unsigned int count, min_size, alloc_sizes = domain->pgsize_bitmap;
0db2e5d1 777 struct page **pages;
842fe519 778 dma_addr_t iova;
a3884774 779 ssize_t ret;
0db2e5d1 780
a8e8af35 781 if (static_branch_unlikely(&iommu_deferred_attach_enabled) &&
3ab65729 782 iommu_deferred_attach(dev, domain))
795bbbb9
TM
783 return NULL;
784
3b6b7e19
RM
785 min_size = alloc_sizes & -alloc_sizes;
786 if (min_size < PAGE_SIZE) {
787 min_size = PAGE_SIZE;
788 alloc_sizes |= PAGE_SIZE;
789 } else {
790 size = ALIGN(size, min_size);
791 }
00085f1e 792 if (attrs & DMA_ATTR_ALLOC_SINGLE_PAGES)
3b6b7e19
RM
793 alloc_sizes = min_size;
794
795 count = PAGE_ALIGN(size) >> PAGE_SHIFT;
c4b17afb
GK
796 pages = __iommu_dma_alloc_pages(dev, count, alloc_sizes >> PAGE_SHIFT,
797 gfp);
0db2e5d1
RM
798 if (!pages)
799 return NULL;
800
842fe519
RM
801 size = iova_align(iovad, size);
802 iova = iommu_dma_alloc_iova(domain, size, dev->coherent_dma_mask, dev);
0db2e5d1
RM
803 if (!iova)
804 goto out_free_pages;
805
8230ce9a 806 if (sg_alloc_table_from_pages(sgt, pages, count, 0, size, GFP_KERNEL))
0db2e5d1
RM
807 goto out_free_iova;
808
21b95aaf 809 if (!(ioprot & IOMMU_CACHE)) {
23f88e0a
CH
810 struct scatterlist *sg;
811 int i;
812
8230ce9a 813 for_each_sg(sgt->sgl, sg, sgt->orig_nents, i)
23f88e0a 814 arch_dma_prep_coherent(sg_page(sg), sg->length);
0db2e5d1
RM
815 }
816
a3884774
YW
817 ret = iommu_map_sg_atomic(domain, iova, sgt->sgl, sgt->orig_nents, ioprot);
818 if (ret < 0 || ret < size)
0db2e5d1
RM
819 goto out_free_sg;
820
8230ce9a 821 sgt->sgl->dma_address = iova;
e817ee5f 822 sgt->sgl->dma_length = size;
8230ce9a
CH
823 return pages;
824
825out_free_sg:
826 sg_free_table(sgt);
827out_free_iova:
828 iommu_dma_free_iova(cookie, iova, size, NULL);
829out_free_pages:
830 __iommu_dma_free_pages(pages, count);
831 return NULL;
832}
833
834static void *iommu_dma_alloc_remap(struct device *dev, size_t size,
835 dma_addr_t *dma_handle, gfp_t gfp, pgprot_t prot,
836 unsigned long attrs)
837{
838 struct page **pages;
839 struct sg_table sgt;
840 void *vaddr;
841
842 pages = __iommu_dma_alloc_noncontiguous(dev, size, &sgt, gfp, prot,
843 attrs);
844 if (!pages)
845 return NULL;
846 *dma_handle = sgt.sgl->dma_address;
847 sg_free_table(&sgt);
51231740 848 vaddr = dma_common_pages_remap(pages, size, prot,
21b95aaf
CH
849 __builtin_return_address(0));
850 if (!vaddr)
851 goto out_unmap;
21b95aaf 852 return vaddr;
0db2e5d1 853
21b95aaf 854out_unmap:
8230ce9a
CH
855 __iommu_dma_unmap(dev, *dma_handle, size);
856 __iommu_dma_free_pages(pages, PAGE_ALIGN(size) >> PAGE_SHIFT);
0db2e5d1
RM
857 return NULL;
858}
859
e817ee5f
CH
860static struct sg_table *iommu_dma_alloc_noncontiguous(struct device *dev,
861 size_t size, enum dma_data_direction dir, gfp_t gfp,
862 unsigned long attrs)
863{
864 struct dma_sgt_handle *sh;
865
866 sh = kmalloc(sizeof(*sh), gfp);
867 if (!sh)
868 return NULL;
869
870 sh->pages = __iommu_dma_alloc_noncontiguous(dev, size, &sh->sgt, gfp,
871 PAGE_KERNEL, attrs);
872 if (!sh->pages) {
873 kfree(sh);
874 return NULL;
875 }
876 return &sh->sgt;
877}
878
879static void iommu_dma_free_noncontiguous(struct device *dev, size_t size,
880 struct sg_table *sgt, enum dma_data_direction dir)
881{
882 struct dma_sgt_handle *sh = sgt_handle(sgt);
883
884 __iommu_dma_unmap(dev, sgt->sgl->dma_address, size);
885 __iommu_dma_free_pages(sh->pages, PAGE_ALIGN(size) >> PAGE_SHIFT);
886 sg_free_table(&sh->sgt);
0fbea680 887 kfree(sh);
e817ee5f 888}
e817ee5f 889
06d60728
CH
890static void iommu_dma_sync_single_for_cpu(struct device *dev,
891 dma_addr_t dma_handle, size_t size, enum dma_data_direction dir)
0db2e5d1 892{
06d60728 893 phys_addr_t phys;
0db2e5d1 894
2e727bff 895 if (dev_is_dma_coherent(dev) && !dev_use_swiotlb(dev))
06d60728 896 return;
1cc896ed 897
06d60728 898 phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle);
82612d66
TM
899 if (!dev_is_dma_coherent(dev))
900 arch_sync_dma_for_cpu(phys, size, dir);
901
7fd856aa 902 if (is_swiotlb_buffer(dev, phys))
80808d27 903 swiotlb_sync_single_for_cpu(dev, phys, size, dir);
0db2e5d1 904}
0db2e5d1 905
06d60728
CH
906static void iommu_dma_sync_single_for_device(struct device *dev,
907 dma_addr_t dma_handle, size_t size, enum dma_data_direction dir)
0db2e5d1 908{
06d60728 909 phys_addr_t phys;
0db2e5d1 910
2e727bff 911 if (dev_is_dma_coherent(dev) && !dev_use_swiotlb(dev))
06d60728 912 return;
1cc896ed 913
06d60728 914 phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle);
7fd856aa 915 if (is_swiotlb_buffer(dev, phys))
80808d27 916 swiotlb_sync_single_for_device(dev, phys, size, dir);
82612d66
TM
917
918 if (!dev_is_dma_coherent(dev))
919 arch_sync_dma_for_device(phys, size, dir);
06d60728 920}
0db2e5d1 921
06d60728
CH
922static void iommu_dma_sync_sg_for_cpu(struct device *dev,
923 struct scatterlist *sgl, int nelems,
924 enum dma_data_direction dir)
925{
926 struct scatterlist *sg;
927 int i;
928
2e727bff 929 if (dev_use_swiotlb(dev))
08ae5d4a
DS
930 for_each_sg(sgl, sg, nelems, i)
931 iommu_dma_sync_single_for_cpu(dev, sg_dma_address(sg),
932 sg->length, dir);
933 else if (!dev_is_dma_coherent(dev))
934 for_each_sg(sgl, sg, nelems, i)
82612d66 935 arch_sync_dma_for_cpu(sg_phys(sg), sg->length, dir);
06d60728
CH
936}
937
938static void iommu_dma_sync_sg_for_device(struct device *dev,
939 struct scatterlist *sgl, int nelems,
940 enum dma_data_direction dir)
941{
942 struct scatterlist *sg;
943 int i;
944
2e727bff 945 if (dev_use_swiotlb(dev))
08ae5d4a
DS
946 for_each_sg(sgl, sg, nelems, i)
947 iommu_dma_sync_single_for_device(dev,
948 sg_dma_address(sg),
949 sg->length, dir);
950 else if (!dev_is_dma_coherent(dev))
951 for_each_sg(sgl, sg, nelems, i)
82612d66 952 arch_sync_dma_for_device(sg_phys(sg), sg->length, dir);
0db2e5d1
RM
953}
954
06d60728
CH
955static dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page,
956 unsigned long offset, size_t size, enum dma_data_direction dir,
957 unsigned long attrs)
51f8cc9e 958{
06d60728
CH
959 phys_addr_t phys = page_to_phys(page) + offset;
960 bool coherent = dev_is_dma_coherent(dev);
9b49bbc2
DS
961 int prot = dma_info_to_prot(dir, coherent, attrs);
962 struct iommu_domain *domain = iommu_get_dma_domain(dev);
963 struct iommu_dma_cookie *cookie = domain->iova_cookie;
964 struct iova_domain *iovad = &cookie->iovad;
9b49bbc2
DS
965 dma_addr_t iova, dma_mask = dma_get_mask(dev);
966
967 /*
968 * If both the physical buffer start address and size are
969 * page aligned, we don't need to use a bounce page.
970 */
2e727bff 971 if (dev_use_swiotlb(dev) && iova_offset(iovad, phys | size)) {
9b49bbc2 972 void *padding_start;
2cbc61a1 973 size_t padding_size, aligned_size;
9b49bbc2 974
f316ba0a
ML
975 if (!is_swiotlb_active(dev)) {
976 dev_warn_once(dev, "DMA bounce buffers are inactive, unable to map unaligned transaction.\n");
977 return DMA_MAPPING_ERROR;
978 }
979
9b49bbc2 980 aligned_size = iova_align(iovad, size);
e81e99ba
DS
981 phys = swiotlb_tbl_map_single(dev, phys, size, aligned_size,
982 iova_mask(iovad), dir, attrs);
9b49bbc2
DS
983
984 if (phys == DMA_MAPPING_ERROR)
985 return DMA_MAPPING_ERROR;
06d60728 986
9b49bbc2
DS
987 /* Cleanup the padding area. */
988 padding_start = phys_to_virt(phys);
989 padding_size = aligned_size;
990
991 if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
992 (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)) {
993 padding_start += size;
994 padding_size -= size;
995 }
996
997 memset(padding_start, 0, padding_size);
998 }
06d60728 999
9b49bbc2 1000 if (!coherent && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
56e35f9c 1001 arch_sync_dma_for_device(phys, size, dir);
9b49bbc2 1002
2cbc61a1 1003 iova = __iommu_dma_map(dev, phys, size, prot, dma_mask);
9b49bbc2
DS
1004 if (iova == DMA_MAPPING_ERROR && is_swiotlb_buffer(dev, phys))
1005 swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs);
1006 return iova;
51f8cc9e
RM
1007}
1008
06d60728
CH
1009static void iommu_dma_unmap_page(struct device *dev, dma_addr_t dma_handle,
1010 size_t size, enum dma_data_direction dir, unsigned long attrs)
0db2e5d1 1011{
9b49bbc2
DS
1012 struct iommu_domain *domain = iommu_get_dma_domain(dev);
1013 phys_addr_t phys;
1014
1015 phys = iommu_iova_to_phys(domain, dma_handle);
1016 if (WARN_ON(!phys))
1017 return;
1018
1019 if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && !dev_is_dma_coherent(dev))
1020 arch_sync_dma_for_cpu(phys, size, dir);
1021
1022 __iommu_dma_unmap(dev, dma_handle, size);
1023
1024 if (unlikely(is_swiotlb_buffer(dev, phys)))
1025 swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs);
0db2e5d1
RM
1026}
1027
1028/*
1029 * Prepare a successfully-mapped scatterlist to give back to the caller.
809eac54
RM
1030 *
1031 * At this point the segments are already laid out by iommu_dma_map_sg() to
1032 * avoid individually crossing any boundaries, so we merely need to check a
1033 * segment's start address to avoid concatenating across one.
0db2e5d1
RM
1034 */
1035static int __finalise_sg(struct device *dev, struct scatterlist *sg, int nents,
1036 dma_addr_t dma_addr)
1037{
809eac54
RM
1038 struct scatterlist *s, *cur = sg;
1039 unsigned long seg_mask = dma_get_seg_boundary(dev);
1040 unsigned int cur_len = 0, max_len = dma_get_max_seg_size(dev);
1041 int i, count = 0;
0db2e5d1
RM
1042
1043 for_each_sg(sg, s, nents, i) {
809eac54
RM
1044 /* Restore this segment's original unaligned fields first */
1045 unsigned int s_iova_off = sg_dma_address(s);
0db2e5d1 1046 unsigned int s_length = sg_dma_len(s);
809eac54 1047 unsigned int s_iova_len = s->length;
0db2e5d1 1048
809eac54 1049 s->offset += s_iova_off;
0db2e5d1 1050 s->length = s_length;
cad34be7 1051 sg_dma_address(s) = DMA_MAPPING_ERROR;
809eac54
RM
1052 sg_dma_len(s) = 0;
1053
1054 /*
1055 * Now fill in the real DMA data. If...
1056 * - there is a valid output segment to append to
1057 * - and this segment starts on an IOVA page boundary
1058 * - but doesn't fall at a segment boundary
1059 * - and wouldn't make the resulting output segment too long
1060 */
1061 if (cur_len && !s_iova_off && (dma_addr & seg_mask) &&
ab2cbeb0 1062 (max_len - cur_len >= s_length)) {
809eac54
RM
1063 /* ...then concatenate it with the previous one */
1064 cur_len += s_length;
1065 } else {
1066 /* Otherwise start the next output segment */
1067 if (i > 0)
1068 cur = sg_next(cur);
1069 cur_len = s_length;
1070 count++;
1071
1072 sg_dma_address(cur) = dma_addr + s_iova_off;
1073 }
1074
1075 sg_dma_len(cur) = cur_len;
1076 dma_addr += s_iova_len;
1077
1078 if (s_length + s_iova_off < s_iova_len)
1079 cur_len = 0;
0db2e5d1 1080 }
809eac54 1081 return count;
0db2e5d1
RM
1082}
1083
1084/*
1085 * If mapping failed, then just restore the original list,
1086 * but making sure the DMA fields are invalidated.
1087 */
1088static void __invalidate_sg(struct scatterlist *sg, int nents)
1089{
1090 struct scatterlist *s;
1091 int i;
1092
1093 for_each_sg(sg, s, nents, i) {
cad34be7 1094 if (sg_dma_address(s) != DMA_MAPPING_ERROR)
07b48ac4 1095 s->offset += sg_dma_address(s);
0db2e5d1
RM
1096 if (sg_dma_len(s))
1097 s->length = sg_dma_len(s);
cad34be7 1098 sg_dma_address(s) = DMA_MAPPING_ERROR;
0db2e5d1
RM
1099 sg_dma_len(s) = 0;
1100 }
1101}
1102
82612d66
TM
1103static void iommu_dma_unmap_sg_swiotlb(struct device *dev, struct scatterlist *sg,
1104 int nents, enum dma_data_direction dir, unsigned long attrs)
1105{
1106 struct scatterlist *s;
1107 int i;
1108
1109 for_each_sg(sg, s, nents, i)
9b49bbc2 1110 iommu_dma_unmap_page(dev, sg_dma_address(s),
82612d66
TM
1111 sg_dma_len(s), dir, attrs);
1112}
1113
1114static int iommu_dma_map_sg_swiotlb(struct device *dev, struct scatterlist *sg,
1115 int nents, enum dma_data_direction dir, unsigned long attrs)
1116{
1117 struct scatterlist *s;
1118 int i;
1119
1120 for_each_sg(sg, s, nents, i) {
9b49bbc2
DS
1121 sg_dma_address(s) = iommu_dma_map_page(dev, sg_page(s),
1122 s->offset, s->length, dir, attrs);
82612d66
TM
1123 if (sg_dma_address(s) == DMA_MAPPING_ERROR)
1124 goto out_unmap;
1125 sg_dma_len(s) = s->length;
1126 }
1127
1128 return nents;
1129
1130out_unmap:
1131 iommu_dma_unmap_sg_swiotlb(dev, sg, i, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC);
dabb16f6 1132 return -EIO;
82612d66
TM
1133}
1134
0db2e5d1
RM
1135/*
1136 * The DMA API client is passing in a scatterlist which could describe
1137 * any old buffer layout, but the IOMMU API requires everything to be
1138 * aligned to IOMMU pages. Hence the need for this complicated bit of
1139 * impedance-matching, to be able to hand off a suitably-aligned list,
1140 * but still preserve the original offsets and sizes for the caller.
1141 */
06d60728
CH
1142static int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg,
1143 int nents, enum dma_data_direction dir, unsigned long attrs)
0db2e5d1 1144{
43c5bf11 1145 struct iommu_domain *domain = iommu_get_dma_domain(dev);
842fe519
RM
1146 struct iommu_dma_cookie *cookie = domain->iova_cookie;
1147 struct iova_domain *iovad = &cookie->iovad;
0db2e5d1 1148 struct scatterlist *s, *prev = NULL;
06d60728 1149 int prot = dma_info_to_prot(dir, dev_is_dma_coherent(dev), attrs);
842fe519 1150 dma_addr_t iova;
0db2e5d1 1151 size_t iova_len = 0;
809eac54 1152 unsigned long mask = dma_get_seg_boundary(dev);
dabb16f6 1153 ssize_t ret;
0db2e5d1
RM
1154 int i;
1155
dabb16f6
LG
1156 if (static_branch_unlikely(&iommu_deferred_attach_enabled)) {
1157 ret = iommu_deferred_attach(dev, domain);
ac315f96
LG
1158 if (ret)
1159 goto out;
dabb16f6 1160 }
795bbbb9 1161
2e727bff 1162 if (dev_use_swiotlb(dev))
82612d66
TM
1163 return iommu_dma_map_sg_swiotlb(dev, sg, nents, dir, attrs);
1164
06d60728
CH
1165 if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
1166 iommu_dma_sync_sg_for_device(dev, sg, nents, dir);
1167
0db2e5d1
RM
1168 /*
1169 * Work out how much IOVA space we need, and align the segments to
1170 * IOVA granules for the IOMMU driver to handle. With some clever
1171 * trickery we can modify the list in-place, but reversibly, by
809eac54 1172 * stashing the unaligned parts in the as-yet-unused DMA fields.
0db2e5d1
RM
1173 */
1174 for_each_sg(sg, s, nents, i) {
809eac54 1175 size_t s_iova_off = iova_offset(iovad, s->offset);
0db2e5d1 1176 size_t s_length = s->length;
809eac54 1177 size_t pad_len = (mask - iova_len + 1) & mask;
0db2e5d1 1178
809eac54 1179 sg_dma_address(s) = s_iova_off;
0db2e5d1 1180 sg_dma_len(s) = s_length;
809eac54
RM
1181 s->offset -= s_iova_off;
1182 s_length = iova_align(iovad, s_length + s_iova_off);
0db2e5d1
RM
1183 s->length = s_length;
1184
1185 /*
809eac54
RM
1186 * Due to the alignment of our single IOVA allocation, we can
1187 * depend on these assumptions about the segment boundary mask:
1188 * - If mask size >= IOVA size, then the IOVA range cannot
1189 * possibly fall across a boundary, so we don't care.
1190 * - If mask size < IOVA size, then the IOVA range must start
1191 * exactly on a boundary, therefore we can lay things out
1192 * based purely on segment lengths without needing to know
1193 * the actual addresses beforehand.
1194 * - The mask must be a power of 2, so pad_len == 0 if
1195 * iova_len == 0, thus we cannot dereference prev the first
1196 * time through here (i.e. before it has a meaningful value).
0db2e5d1 1197 */
809eac54 1198 if (pad_len && pad_len < s_length - 1) {
0db2e5d1
RM
1199 prev->length += pad_len;
1200 iova_len += pad_len;
1201 }
1202
1203 iova_len += s_length;
1204 prev = s;
1205 }
1206
842fe519 1207 iova = iommu_dma_alloc_iova(domain, iova_len, dma_get_mask(dev), dev);
dabb16f6
LG
1208 if (!iova) {
1209 ret = -ENOMEM;
0db2e5d1 1210 goto out_restore_sg;
dabb16f6 1211 }
0db2e5d1
RM
1212
1213 /*
1214 * We'll leave any physical concatenation to the IOMMU driver's
1215 * implementation - it knows better than we do.
1216 */
dabb16f6 1217 ret = iommu_map_sg_atomic(domain, iova, sg, nents, prot);
a3884774 1218 if (ret < 0 || ret < iova_len)
0db2e5d1
RM
1219 goto out_free_iova;
1220
842fe519 1221 return __finalise_sg(dev, sg, nents, iova);
0db2e5d1
RM
1222
1223out_free_iova:
2a2b8eaa 1224 iommu_dma_free_iova(cookie, iova, iova_len, NULL);
0db2e5d1
RM
1225out_restore_sg:
1226 __invalidate_sg(sg, nents);
dabb16f6
LG
1227out:
1228 if (ret != -ENOMEM)
1229 return -EINVAL;
1230 return ret;
0db2e5d1
RM
1231}
1232
06d60728
CH
1233static void iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sg,
1234 int nents, enum dma_data_direction dir, unsigned long attrs)
0db2e5d1 1235{
842fe519
RM
1236 dma_addr_t start, end;
1237 struct scatterlist *tmp;
1238 int i;
06d60728 1239
2e727bff 1240 if (dev_use_swiotlb(dev)) {
82612d66
TM
1241 iommu_dma_unmap_sg_swiotlb(dev, sg, nents, dir, attrs);
1242 return;
1243 }
1244
ee9d4097
DS
1245 if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
1246 iommu_dma_sync_sg_for_cpu(dev, sg, nents, dir);
1247
0db2e5d1
RM
1248 /*
1249 * The scatterlist segments are mapped into a single
1250 * contiguous IOVA allocation, so this is incredibly easy.
1251 */
842fe519
RM
1252 start = sg_dma_address(sg);
1253 for_each_sg(sg_next(sg), tmp, nents - 1, i) {
1254 if (sg_dma_len(tmp) == 0)
1255 break;
1256 sg = tmp;
1257 }
1258 end = sg_dma_address(sg) + sg_dma_len(sg);
b61d271e 1259 __iommu_dma_unmap(dev, start, end - start);
0db2e5d1
RM
1260}
1261
06d60728 1262static dma_addr_t iommu_dma_map_resource(struct device *dev, phys_addr_t phys,
51f8cc9e
RM
1263 size_t size, enum dma_data_direction dir, unsigned long attrs)
1264{
1265 return __iommu_dma_map(dev, phys, size,
6e235020
TM
1266 dma_info_to_prot(dir, false, attrs) | IOMMU_MMIO,
1267 dma_get_mask(dev));
51f8cc9e
RM
1268}
1269
06d60728 1270static void iommu_dma_unmap_resource(struct device *dev, dma_addr_t handle,
51f8cc9e
RM
1271 size_t size, enum dma_data_direction dir, unsigned long attrs)
1272{
b61d271e 1273 __iommu_dma_unmap(dev, handle, size);
51f8cc9e
RM
1274}
1275
8553f6e6 1276static void __iommu_dma_free(struct device *dev, size_t size, void *cpu_addr)
bcf4b9c4
RM
1277{
1278 size_t alloc_size = PAGE_ALIGN(size);
1279 int count = alloc_size >> PAGE_SHIFT;
1280 struct page *page = NULL, **pages = NULL;
1281
bcf4b9c4 1282 /* Non-coherent atomic allocation? Easy */
e6475eb0 1283 if (IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) &&
c84dc6e6 1284 dma_free_from_pool(dev, cpu_addr, alloc_size))
bcf4b9c4
RM
1285 return;
1286
f5ff79fd 1287 if (is_vmalloc_addr(cpu_addr)) {
bcf4b9c4
RM
1288 /*
1289 * If it the address is remapped, then it's either non-coherent
1290 * or highmem CMA, or an iommu_dma_alloc_remap() construction.
1291 */
5cf45379 1292 pages = dma_common_find_pages(cpu_addr);
bcf4b9c4
RM
1293 if (!pages)
1294 page = vmalloc_to_page(cpu_addr);
51231740 1295 dma_common_free_remap(cpu_addr, alloc_size);
bcf4b9c4
RM
1296 } else {
1297 /* Lowmem means a coherent atomic or CMA allocation */
1298 page = virt_to_page(cpu_addr);
1299 }
1300
1301 if (pages)
1302 __iommu_dma_free_pages(pages, count);
591fcf3b
NC
1303 if (page)
1304 dma_free_contiguous(dev, page, alloc_size);
bcf4b9c4
RM
1305}
1306
8553f6e6
RM
1307static void iommu_dma_free(struct device *dev, size_t size, void *cpu_addr,
1308 dma_addr_t handle, unsigned long attrs)
1309{
1310 __iommu_dma_unmap(dev, handle, size);
1311 __iommu_dma_free(dev, size, cpu_addr);
1312}
1313
ee1ef05d
CH
1314static void *iommu_dma_alloc_pages(struct device *dev, size_t size,
1315 struct page **pagep, gfp_t gfp, unsigned long attrs)
06d60728
CH
1316{
1317 bool coherent = dev_is_dma_coherent(dev);
9ad5d6ed 1318 size_t alloc_size = PAGE_ALIGN(size);
90ae409f 1319 int node = dev_to_node(dev);
9a4ab94a 1320 struct page *page = NULL;
9ad5d6ed 1321 void *cpu_addr;
06d60728 1322
591fcf3b 1323 page = dma_alloc_contiguous(dev, alloc_size, gfp);
90ae409f
CH
1324 if (!page)
1325 page = alloc_pages_node(node, gfp, get_order(alloc_size));
072bebc0
RM
1326 if (!page)
1327 return NULL;
1328
f5ff79fd 1329 if (!coherent || PageHighMem(page)) {
33dcb37c 1330 pgprot_t prot = dma_pgprot(dev, PAGE_KERNEL, attrs);
072bebc0 1331
9ad5d6ed 1332 cpu_addr = dma_common_contiguous_remap(page, alloc_size,
51231740 1333 prot, __builtin_return_address(0));
9ad5d6ed 1334 if (!cpu_addr)
ee1ef05d 1335 goto out_free_pages;
8680aa5a
RM
1336
1337 if (!coherent)
9ad5d6ed 1338 arch_dma_prep_coherent(page, size);
8680aa5a 1339 } else {
9ad5d6ed 1340 cpu_addr = page_address(page);
8680aa5a 1341 }
ee1ef05d
CH
1342
1343 *pagep = page;
9ad5d6ed
RM
1344 memset(cpu_addr, 0, alloc_size);
1345 return cpu_addr;
072bebc0 1346out_free_pages:
591fcf3b 1347 dma_free_contiguous(dev, page, alloc_size);
072bebc0 1348 return NULL;
06d60728
CH
1349}
1350
ee1ef05d
CH
1351static void *iommu_dma_alloc(struct device *dev, size_t size,
1352 dma_addr_t *handle, gfp_t gfp, unsigned long attrs)
1353{
1354 bool coherent = dev_is_dma_coherent(dev);
1355 int ioprot = dma_info_to_prot(DMA_BIDIRECTIONAL, coherent, attrs);
1356 struct page *page = NULL;
1357 void *cpu_addr;
1358
1359 gfp |= __GFP_ZERO;
1360
f5ff79fd 1361 if (gfpflags_allow_blocking(gfp) &&
e8d39a90
CH
1362 !(attrs & DMA_ATTR_FORCE_CONTIGUOUS)) {
1363 return iommu_dma_alloc_remap(dev, size, handle, gfp,
1364 dma_pgprot(dev, PAGE_KERNEL, attrs), attrs);
1365 }
ee1ef05d 1366
e6475eb0
CH
1367 if (IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) &&
1368 !gfpflags_allow_blocking(gfp) && !coherent)
9420139f
CH
1369 page = dma_alloc_from_pool(dev, PAGE_ALIGN(size), &cpu_addr,
1370 gfp, NULL);
ee1ef05d
CH
1371 else
1372 cpu_addr = iommu_dma_alloc_pages(dev, size, &page, gfp, attrs);
1373 if (!cpu_addr)
1374 return NULL;
1375
6e235020
TM
1376 *handle = __iommu_dma_map(dev, page_to_phys(page), size, ioprot,
1377 dev->coherent_dma_mask);
ee1ef05d
CH
1378 if (*handle == DMA_MAPPING_ERROR) {
1379 __iommu_dma_free(dev, size, cpu_addr);
1380 return NULL;
1381 }
1382
1383 return cpu_addr;
1384}
1385
06d60728
CH
1386static int iommu_dma_mmap(struct device *dev, struct vm_area_struct *vma,
1387 void *cpu_addr, dma_addr_t dma_addr, size_t size,
1388 unsigned long attrs)
1389{
1390 unsigned long nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
efd9f10b 1391 unsigned long pfn, off = vma->vm_pgoff;
06d60728
CH
1392 int ret;
1393
33dcb37c 1394 vma->vm_page_prot = dma_pgprot(dev, vma->vm_page_prot, attrs);
06d60728
CH
1395
1396 if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret))
1397 return ret;
1398
1399 if (off >= nr_pages || vma_pages(vma) > nr_pages - off)
1400 return -ENXIO;
1401
f5ff79fd 1402 if (is_vmalloc_addr(cpu_addr)) {
5cf45379 1403 struct page **pages = dma_common_find_pages(cpu_addr);
06d60728 1404
efd9f10b 1405 if (pages)
71fe89ce 1406 return vm_map_pages(vma, pages, nr_pages);
efd9f10b
CH
1407 pfn = vmalloc_to_pfn(cpu_addr);
1408 } else {
1409 pfn = page_to_pfn(virt_to_page(cpu_addr));
06d60728
CH
1410 }
1411
efd9f10b
CH
1412 return remap_pfn_range(vma, vma->vm_start, pfn + off,
1413 vma->vm_end - vma->vm_start,
1414 vma->vm_page_prot);
06d60728
CH
1415}
1416
06d60728
CH
1417static int iommu_dma_get_sgtable(struct device *dev, struct sg_table *sgt,
1418 void *cpu_addr, dma_addr_t dma_addr, size_t size,
1419 unsigned long attrs)
1420{
3fb3378b
CH
1421 struct page *page;
1422 int ret;
06d60728 1423
f5ff79fd 1424 if (is_vmalloc_addr(cpu_addr)) {
5cf45379 1425 struct page **pages = dma_common_find_pages(cpu_addr);
06d60728 1426
3fb3378b
CH
1427 if (pages) {
1428 return sg_alloc_table_from_pages(sgt, pages,
1429 PAGE_ALIGN(size) >> PAGE_SHIFT,
1430 0, size, GFP_KERNEL);
1431 }
1432
1433 page = vmalloc_to_page(cpu_addr);
1434 } else {
1435 page = virt_to_page(cpu_addr);
06d60728
CH
1436 }
1437
3fb3378b
CH
1438 ret = sg_alloc_table(sgt, 1, GFP_KERNEL);
1439 if (!ret)
1440 sg_set_page(sgt->sgl, page, PAGE_ALIGN(size), 0);
1441 return ret;
06d60728
CH
1442}
1443
158a6d3c
YS
1444static unsigned long iommu_dma_get_merge_boundary(struct device *dev)
1445{
1446 struct iommu_domain *domain = iommu_get_dma_domain(dev);
1447
1448 return (1UL << __ffs(domain->pgsize_bitmap)) - 1;
1449}
1450
06d60728
CH
1451static const struct dma_map_ops iommu_dma_ops = {
1452 .alloc = iommu_dma_alloc,
1453 .free = iommu_dma_free,
efa70f2f
CH
1454 .alloc_pages = dma_common_alloc_pages,
1455 .free_pages = dma_common_free_pages,
e817ee5f
CH
1456 .alloc_noncontiguous = iommu_dma_alloc_noncontiguous,
1457 .free_noncontiguous = iommu_dma_free_noncontiguous,
06d60728
CH
1458 .mmap = iommu_dma_mmap,
1459 .get_sgtable = iommu_dma_get_sgtable,
1460 .map_page = iommu_dma_map_page,
1461 .unmap_page = iommu_dma_unmap_page,
1462 .map_sg = iommu_dma_map_sg,
1463 .unmap_sg = iommu_dma_unmap_sg,
1464 .sync_single_for_cpu = iommu_dma_sync_single_for_cpu,
1465 .sync_single_for_device = iommu_dma_sync_single_for_device,
1466 .sync_sg_for_cpu = iommu_dma_sync_sg_for_cpu,
1467 .sync_sg_for_device = iommu_dma_sync_sg_for_device,
1468 .map_resource = iommu_dma_map_resource,
1469 .unmap_resource = iommu_dma_unmap_resource,
158a6d3c 1470 .get_merge_boundary = iommu_dma_get_merge_boundary,
06d60728
CH
1471};
1472
1473/*
1474 * The IOMMU core code allocates the default DMA domain, which the underlying
1475 * IOMMU driver needs to support via the dma-iommu layer.
1476 */
ac6d7046 1477void iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 dma_limit)
06d60728
CH
1478{
1479 struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
1480
1481 if (!domain)
1482 goto out_err;
1483
1484 /*
1485 * The IOMMU core code allocates the default DMA domain, which the
1486 * underlying IOMMU driver needs to support via the dma-iommu layer.
1487 */
bf3aed46 1488 if (iommu_is_dma_domain(domain)) {
ac6d7046 1489 if (iommu_dma_init_domain(domain, dma_base, dma_limit, dev))
06d60728
CH
1490 goto out_err;
1491 dev->dma_ops = &iommu_dma_ops;
1492 }
1493
1494 return;
1495out_err:
1496 pr_warn("Failed to set up IOMMU for device %s; retaining platform DMA ops\n",
1497 dev_name(dev));
51f8cc9e 1498}
8ce4904b 1499EXPORT_SYMBOL_GPL(iommu_setup_dma_ops);
51f8cc9e 1500
44bb7e24
RM
1501static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev,
1502 phys_addr_t msi_addr, struct iommu_domain *domain)
1503{
1504 struct iommu_dma_cookie *cookie = domain->iova_cookie;
1505 struct iommu_dma_msi_page *msi_page;
842fe519 1506 dma_addr_t iova;
44bb7e24 1507 int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
fdbe574e 1508 size_t size = cookie_msi_granule(cookie);
44bb7e24 1509
fdbe574e 1510 msi_addr &= ~(phys_addr_t)(size - 1);
44bb7e24
RM
1511 list_for_each_entry(msi_page, &cookie->msi_page_list, list)
1512 if (msi_page->phys == msi_addr)
1513 return msi_page;
1514
c1864790 1515 msi_page = kzalloc(sizeof(*msi_page), GFP_KERNEL);
44bb7e24
RM
1516 if (!msi_page)
1517 return NULL;
1518
8af23fad
RM
1519 iova = iommu_dma_alloc_iova(domain, size, dma_get_mask(dev), dev);
1520 if (!iova)
a44e6657 1521 goto out_free_page;
44bb7e24 1522
8af23fad
RM
1523 if (iommu_map(domain, iova, msi_addr, size, prot))
1524 goto out_free_iova;
1525
44bb7e24 1526 INIT_LIST_HEAD(&msi_page->list);
a44e6657
RM
1527 msi_page->phys = msi_addr;
1528 msi_page->iova = iova;
44bb7e24
RM
1529 list_add(&msi_page->list, &cookie->msi_page_list);
1530 return msi_page;
1531
8af23fad 1532out_free_iova:
2a2b8eaa 1533 iommu_dma_free_iova(cookie, iova, size, NULL);
44bb7e24
RM
1534out_free_page:
1535 kfree(msi_page);
1536 return NULL;
1537}
1538
ece6e6f0 1539int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_addr)
44bb7e24 1540{
ece6e6f0 1541 struct device *dev = msi_desc_to_dev(desc);
44bb7e24 1542 struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
44bb7e24 1543 struct iommu_dma_msi_page *msi_page;
c1864790 1544 static DEFINE_MUTEX(msi_prepare_lock); /* see below */
44bb7e24 1545
ece6e6f0
JG
1546 if (!domain || !domain->iova_cookie) {
1547 desc->iommu_cookie = NULL;
1548 return 0;
1549 }
44bb7e24 1550
44bb7e24 1551 /*
c1864790
RM
1552 * In fact the whole prepare operation should already be serialised by
1553 * irq_domain_mutex further up the callchain, but that's pretty subtle
1554 * on its own, so consider this locking as failsafe documentation...
44bb7e24 1555 */
c1864790 1556 mutex_lock(&msi_prepare_lock);
44bb7e24 1557 msi_page = iommu_dma_get_msi_page(dev, msi_addr, domain);
c1864790 1558 mutex_unlock(&msi_prepare_lock);
44bb7e24 1559
ece6e6f0
JG
1560 msi_desc_set_iommu_cookie(desc, msi_page);
1561
1562 if (!msi_page)
1563 return -ENOMEM;
1564 return 0;
1565}
1566
1567void iommu_dma_compose_msi_msg(struct msi_desc *desc,
1568 struct msi_msg *msg)
1569{
1570 struct device *dev = msi_desc_to_dev(desc);
1571 const struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
1572 const struct iommu_dma_msi_page *msi_page;
1573
1574 msi_page = msi_desc_get_iommu_cookie(desc);
1575
1576 if (!domain || !domain->iova_cookie || WARN_ON(!msi_page))
1577 return;
1578
1579 msg->address_hi = upper_32_bits(msi_page->iova);
1580 msg->address_lo &= cookie_msi_granule(domain->iova_cookie) - 1;
1581 msg->address_lo += lower_32_bits(msi_page->iova);
44bb7e24 1582}
06d60728
CH
1583
1584static int iommu_dma_init(void)
1585{
a8e8af35
LJ
1586 if (is_kdump_kernel())
1587 static_branch_enable(&iommu_deferred_attach_enabled);
1588
06d60728 1589 return iova_cache_get();
44bb7e24 1590}
06d60728 1591arch_initcall(iommu_dma_init);