x86: remove cruft from <asm/dma-mapping.h>
[linux-block.git] / drivers / xen / swiotlb-xen.c
CommitLineData
d9523678 1// SPDX-License-Identifier: GPL-2.0-only
b097186f
KRW
2/*
3 * Copyright 2010
4 * by Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
5 *
6 * This code provides a IOMMU for Xen PV guests with PCI passthrough.
7 *
b097186f
KRW
8 * PV guests under Xen are running in an non-contiguous memory architecture.
9 *
10 * When PCI pass-through is utilized, this necessitates an IOMMU for
11 * translating bus (DMA) to virtual and vice-versa and also providing a
12 * mechanism to have contiguous pages for device drivers operations (say DMA
13 * operations).
14 *
15 * Specifically, under Xen the Linux idea of pages is an illusion. It
16 * assumes that pages start at zero and go up to the available memory. To
17 * help with that, the Linux Xen MMU provides a lookup mechanism to
18 * translate the page frame numbers (PFN) to machine frame numbers (MFN)
19 * and vice-versa. The MFN are the "real" frame numbers. Furthermore
20 * memory is not contiguous. Xen hypervisor stitches memory for guests
21 * from different pools, which means there is no guarantee that PFN==MFN
22 * and PFN+1==MFN+1. Lastly with Xen 4.0, pages (in debug mode) are
23 * allocated in descending order (high to low), meaning the guest might
24 * never get any MFN's under the 4GB mark.
b097186f
KRW
25 */
26
283c0972
JP
27#define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt
28
2013288f 29#include <linux/memblock.h>
ea8c64ac 30#include <linux/dma-direct.h>
9f4df96b 31#include <linux/dma-map-ops.h>
63c9744b 32#include <linux/export.h>
b097186f
KRW
33#include <xen/swiotlb-xen.h>
34#include <xen/page.h>
35#include <xen/xen-ops.h>
f4b2f07b 36#include <xen/hvc-console.h>
2b2b614d 37
83862ccf 38#include <asm/dma-mapping.h>
1b65c4e5 39#include <asm/xen/page-coherent.h>
e1d8f62a 40
2b2b614d 41#include <trace/events/swiotlb.h>
e6fa0dc8 42#define MAX_DMA_BITS 32
b097186f 43
b097186f
KRW
44/*
45 * Quick lookup value of the bus address of the IOTLB.
46 */
47
91ffe4ad 48static inline phys_addr_t xen_phys_to_bus(struct device *dev, phys_addr_t paddr)
b097186f 49{
9435cce8 50 unsigned long bfn = pfn_to_bfn(XEN_PFN_DOWN(paddr));
91ffe4ad 51 phys_addr_t baddr = (phys_addr_t)bfn << XEN_PAGE_SHIFT;
e17b2f11 52
91ffe4ad
SS
53 baddr |= paddr & ~XEN_PAGE_MASK;
54 return baddr;
55}
e17b2f11 56
91ffe4ad
SS
57static inline dma_addr_t xen_phys_to_dma(struct device *dev, phys_addr_t paddr)
58{
59 return phys_to_dma(dev, xen_phys_to_bus(dev, paddr));
b097186f
KRW
60}
61
91ffe4ad
SS
62static inline phys_addr_t xen_bus_to_phys(struct device *dev,
63 phys_addr_t baddr)
b097186f 64{
9435cce8 65 unsigned long xen_pfn = bfn_to_pfn(XEN_PFN_DOWN(baddr));
91ffe4ad
SS
66 phys_addr_t paddr = (xen_pfn << XEN_PAGE_SHIFT) |
67 (baddr & ~XEN_PAGE_MASK);
e17b2f11
IC
68
69 return paddr;
b097186f
KRW
70}
71
91ffe4ad
SS
72static inline phys_addr_t xen_dma_to_phys(struct device *dev,
73 dma_addr_t dma_addr)
74{
75 return xen_bus_to_phys(dev, dma_to_phys(dev, dma_addr));
76}
77
bf707266 78static inline int range_straddles_page_boundary(phys_addr_t p, size_t size)
b097186f 79{
bf707266
JG
80 unsigned long next_bfn, xen_pfn = XEN_PFN_DOWN(p);
81 unsigned int i, nr_pages = XEN_PFN_UP(xen_offset_in_page(p) + size);
b097186f 82
9435cce8 83 next_bfn = pfn_to_bfn(xen_pfn);
b097186f 84
bf707266 85 for (i = 1; i < nr_pages; i++)
9435cce8 86 if (pfn_to_bfn(++xen_pfn) != ++next_bfn)
bf707266 87 return 1;
b097186f 88
bf707266 89 return 0;
b097186f
KRW
90}
91
38ba51de 92static int is_xen_swiotlb_buffer(struct device *dev, dma_addr_t dma_addr)
b097186f 93{
91ffe4ad 94 unsigned long bfn = XEN_PFN_DOWN(dma_to_phys(dev, dma_addr));
9435cce8 95 unsigned long xen_pfn = bfn_to_local_pfn(bfn);
e9aab7e4 96 phys_addr_t paddr = (phys_addr_t)xen_pfn << XEN_PAGE_SHIFT;
b097186f
KRW
97
98 /* If the address is outside our domain, it CAN
99 * have the same virtual address as another address
100 * in our domain. Therefore _only_ check address within our domain.
101 */
16bc75f3 102 if (pfn_valid(PFN_DOWN(paddr)))
7fd856aa 103 return is_swiotlb_buffer(dev, paddr);
b097186f
KRW
104 return 0;
105}
106
3f70356e 107int xen_swiotlb_fixup(void *buf, unsigned long nslabs)
b097186f 108{
d9a688ad
JB
109 int rc;
110 unsigned int order = get_order(IO_TLB_SEGSIZE << IO_TLB_SHIFT);
111 unsigned int i, dma_bits = order + PAGE_SHIFT;
69908907 112 dma_addr_t dma_handle;
1b65c4e5 113 phys_addr_t p = virt_to_phys(buf);
b097186f 114
d9a688ad
JB
115 BUILD_BUG_ON(IO_TLB_SEGSIZE & (IO_TLB_SEGSIZE - 1));
116 BUG_ON(nslabs % IO_TLB_SEGSIZE);
b097186f
KRW
117
118 i = 0;
119 do {
b097186f
KRW
120 do {
121 rc = xen_create_contiguous_region(
d9a688ad 122 p + (i << IO_TLB_SHIFT), order,
69908907 123 dma_bits, &dma_handle);
e6fa0dc8 124 } while (rc && dma_bits++ < MAX_DMA_BITS);
b097186f
KRW
125 if (rc)
126 return rc;
127
d9a688ad 128 i += IO_TLB_SEGSIZE;
b097186f
KRW
129 } while (i < nslabs);
130 return 0;
131}
132
dceb1a68 133static void *
b097186f 134xen_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
baa676fc 135 dma_addr_t *dma_handle, gfp_t flags,
00085f1e 136 unsigned long attrs)
b097186f
KRW
137{
138 void *ret;
139 int order = get_order(size);
140 u64 dma_mask = DMA_BIT_MASK(32);
6810df88
KRW
141 phys_addr_t phys;
142 dma_addr_t dev_addr;
b097186f
KRW
143
144 /*
145 * Ignore region specifiers - the kernel's ideas of
146 * pseudo-phys memory layout has nothing to do with the
147 * machine physical layout. We can't allocate highmem
148 * because we can't return a pointer to it.
149 */
150 flags &= ~(__GFP_DMA | __GFP_HIGHMEM);
151
7250f422
JJ
152 /* Convert the size to actually allocated. */
153 size = 1UL << (order + XEN_PAGE_SHIFT);
154
1b65c4e5
SS
155 /* On ARM this function returns an ioremap'ped virtual address for
156 * which virt_to_phys doesn't return the corresponding physical
157 * address. In fact on ARM virt_to_phys only works for kernel direct
158 * mapped RAM memory. Also see comment below.
159 */
160 ret = xen_alloc_coherent_pages(hwdev, size, dma_handle, flags, attrs);
b097186f 161
6810df88
KRW
162 if (!ret)
163 return ret;
164
b097186f 165 if (hwdev && hwdev->coherent_dma_mask)
038d07a2 166 dma_mask = hwdev->coherent_dma_mask;
b097186f 167
91ffe4ad 168 /* At this point dma_handle is the dma address, next we are
1b65c4e5
SS
169 * going to set it to the machine address.
170 * Do not use virt_to_phys(ret) because on ARM it doesn't correspond
171 * to *dma_handle. */
91ffe4ad
SS
172 phys = dma_to_phys(hwdev, *dma_handle);
173 dev_addr = xen_phys_to_dma(hwdev, phys);
6810df88
KRW
174 if (((dev_addr + size - 1 <= dma_mask)) &&
175 !range_straddles_page_boundary(phys, size))
176 *dma_handle = dev_addr;
177 else {
1b65c4e5 178 if (xen_create_contiguous_region(phys, order,
69908907 179 fls64(dma_mask), dma_handle) != 0) {
1b65c4e5 180 xen_free_coherent_pages(hwdev, size, ret, (dma_addr_t)phys, attrs);
b097186f
KRW
181 return NULL;
182 }
91ffe4ad 183 *dma_handle = phys_to_dma(hwdev, *dma_handle);
b877ac98 184 SetPageXenRemapped(virt_to_page(ret));
b097186f 185 }
6810df88 186 memset(ret, 0, size);
b097186f
KRW
187 return ret;
188}
b097186f 189
dceb1a68 190static void
b097186f 191xen_swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr,
00085f1e 192 dma_addr_t dev_addr, unsigned long attrs)
b097186f
KRW
193{
194 int order = get_order(size);
6810df88
KRW
195 phys_addr_t phys;
196 u64 dma_mask = DMA_BIT_MASK(32);
8b1e868f 197 struct page *page;
b097186f 198
6810df88
KRW
199 if (hwdev && hwdev->coherent_dma_mask)
200 dma_mask = hwdev->coherent_dma_mask;
201
1b65c4e5
SS
202 /* do not use virt_to_phys because on ARM it doesn't return you the
203 * physical address */
91ffe4ad 204 phys = xen_dma_to_phys(hwdev, dev_addr);
6810df88 205
7250f422
JJ
206 /* Convert the size to actually allocated. */
207 size = 1UL << (order + XEN_PAGE_SHIFT);
208
8b1e868f
BO
209 if (is_vmalloc_addr(vaddr))
210 page = vmalloc_to_page(vaddr);
211 else
212 page = virt_to_page(vaddr);
213
50f6393f 214 if (!WARN_ON((dev_addr + size - 1 > dma_mask) ||
b877ac98 215 range_straddles_page_boundary(phys, size)) &&
8b1e868f 216 TestClearPageXenRemapped(page))
1b65c4e5 217 xen_destroy_contiguous_region(phys, order);
6810df88 218
91ffe4ad
SS
219 xen_free_coherent_pages(hwdev, size, vaddr, phys_to_dma(hwdev, phys),
220 attrs);
b097186f 221}
b097186f
KRW
222
223/*
224 * Map a single buffer of the indicated size for DMA in streaming mode. The
225 * physical address to use is returned.
226 *
227 * Once the device is given the dma address, the device owns this memory until
228 * either xen_swiotlb_unmap_page or xen_swiotlb_dma_sync_single is performed.
229 */
dceb1a68 230static dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page,
b097186f
KRW
231 unsigned long offset, size_t size,
232 enum dma_data_direction dir,
00085f1e 233 unsigned long attrs)
b097186f 234{
e05ed4d1 235 phys_addr_t map, phys = page_to_phys(page) + offset;
91ffe4ad 236 dma_addr_t dev_addr = xen_phys_to_dma(dev, phys);
b097186f
KRW
237
238 BUG_ON(dir == DMA_NONE);
239 /*
240 * If the address happens to be in the device's DMA window,
241 * we can safely return the device addr and not worry about bounce
242 * buffering it.
243 */
68a33b17 244 if (dma_capable(dev, dev_addr, size, true) &&
a4dba130 245 !range_straddles_page_boundary(phys, size) &&
291be10f 246 !xen_arch_need_swiotlb(dev, phys, dev_addr) &&
903cd0f3 247 !is_swiotlb_force_bounce(dev))
063b8271 248 goto done;
b097186f
KRW
249
250 /*
251 * Oh well, have to allocate and map a bounce buffer.
252 */
c6af2aa9 253 trace_swiotlb_bounced(dev, dev_addr, size);
2b2b614d 254
e81e99ba 255 map = swiotlb_tbl_map_single(dev, phys, size, size, 0, dir, attrs);
9c106119 256 if (map == (phys_addr_t)DMA_MAPPING_ERROR)
a4abe0ad 257 return DMA_MAPPING_ERROR;
b097186f 258
b4dca151 259 phys = map;
91ffe4ad 260 dev_addr = xen_phys_to_dma(dev, map);
b097186f
KRW
261
262 /*
263 * Ensure that the address returned is DMA'ble
264 */
68a33b17 265 if (unlikely(!dma_capable(dev, dev_addr, size, true))) {
2973073a 266 swiotlb_tbl_unmap_single(dev, map, size, dir,
063b8271
CH
267 attrs | DMA_ATTR_SKIP_CPU_SYNC);
268 return DMA_MAPPING_ERROR;
269 }
76418421 270
063b8271 271done:
63f0620c
SS
272 if (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) {
273 if (pfn_valid(PFN_DOWN(dma_to_phys(dev, dev_addr))))
274 arch_sync_dma_for_device(phys, size, dir);
275 else
276 xen_dma_sync_for_device(dev, dev_addr, size, dir);
277 }
063b8271 278 return dev_addr;
b097186f 279}
b097186f
KRW
280
281/*
282 * Unmap a single streaming mode DMA translation. The dma_addr and size must
283 * match what was provided for in a previous xen_swiotlb_map_page call. All
284 * other usages are undefined.
285 *
286 * After this call, reads by the cpu to the buffer are guaranteed to see
287 * whatever the device wrote there.
288 */
bf7954e7
CH
289static void xen_swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr,
290 size_t size, enum dma_data_direction dir, unsigned long attrs)
b097186f 291{
91ffe4ad 292 phys_addr_t paddr = xen_dma_to_phys(hwdev, dev_addr);
b097186f
KRW
293
294 BUG_ON(dir == DMA_NONE);
295
63f0620c
SS
296 if (!dev_is_dma_coherent(hwdev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) {
297 if (pfn_valid(PFN_DOWN(dma_to_phys(hwdev, dev_addr))))
298 arch_sync_dma_for_cpu(paddr, size, dir);
299 else
300 xen_dma_sync_for_cpu(hwdev, dev_addr, size, dir);
301 }
6cf05463 302
b097186f 303 /* NOTE: We use dev_addr here, not paddr! */
38ba51de 304 if (is_xen_swiotlb_buffer(hwdev, dev_addr))
2973073a 305 swiotlb_tbl_unmap_single(hwdev, paddr, size, dir, attrs);
b097186f
KRW
306}
307
b097186f 308static void
2e12dcee
CH
309xen_swiotlb_sync_single_for_cpu(struct device *dev, dma_addr_t dma_addr,
310 size_t size, enum dma_data_direction dir)
b097186f 311{
91ffe4ad 312 phys_addr_t paddr = xen_dma_to_phys(dev, dma_addr);
6cf05463 313
63f0620c
SS
314 if (!dev_is_dma_coherent(dev)) {
315 if (pfn_valid(PFN_DOWN(dma_to_phys(dev, dma_addr))))
316 arch_sync_dma_for_cpu(paddr, size, dir);
317 else
318 xen_dma_sync_for_cpu(dev, dma_addr, size, dir);
319 }
6cf05463 320
38ba51de 321 if (is_xen_swiotlb_buffer(dev, dma_addr))
80808d27 322 swiotlb_sync_single_for_cpu(dev, paddr, size, dir);
b097186f
KRW
323}
324
2e12dcee
CH
325static void
326xen_swiotlb_sync_single_for_device(struct device *dev, dma_addr_t dma_addr,
327 size_t size, enum dma_data_direction dir)
b097186f 328{
91ffe4ad 329 phys_addr_t paddr = xen_dma_to_phys(dev, dma_addr);
b097186f 330
38ba51de 331 if (is_xen_swiotlb_buffer(dev, dma_addr))
80808d27 332 swiotlb_sync_single_for_device(dev, paddr, size, dir);
2e12dcee 333
63f0620c
SS
334 if (!dev_is_dma_coherent(dev)) {
335 if (pfn_valid(PFN_DOWN(dma_to_phys(dev, dma_addr))))
336 arch_sync_dma_for_device(paddr, size, dir);
337 else
338 xen_dma_sync_for_device(dev, dma_addr, size, dir);
339 }
b097186f 340}
dceb1a68
CH
341
342/*
343 * Unmap a set of streaming mode DMA translations. Again, cpu read rules
344 * concerning calls here are the same as for swiotlb_unmap_page() above.
345 */
346static void
aca351cc
CH
347xen_swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sgl, int nelems,
348 enum dma_data_direction dir, unsigned long attrs)
dceb1a68
CH
349{
350 struct scatterlist *sg;
351 int i;
352
353 BUG_ON(dir == DMA_NONE);
354
355 for_each_sg(sgl, sg, nelems, i)
bf7954e7
CH
356 xen_swiotlb_unmap_page(hwdev, sg->dma_address, sg_dma_len(sg),
357 dir, attrs);
dceb1a68
CH
358
359}
b097186f 360
dceb1a68 361static int
8b35d9fe 362xen_swiotlb_map_sg(struct device *dev, struct scatterlist *sgl, int nelems,
aca351cc 363 enum dma_data_direction dir, unsigned long attrs)
b097186f
KRW
364{
365 struct scatterlist *sg;
366 int i;
367
368 BUG_ON(dir == DMA_NONE);
369
370 for_each_sg(sgl, sg, nelems, i) {
8b35d9fe
CH
371 sg->dma_address = xen_swiotlb_map_page(dev, sg_page(sg),
372 sg->offset, sg->length, dir, attrs);
373 if (sg->dma_address == DMA_MAPPING_ERROR)
374 goto out_unmap;
781575cd 375 sg_dma_len(sg) = sg->length;
b097186f 376 }
8b35d9fe 377
b097186f 378 return nelems;
8b35d9fe
CH
379out_unmap:
380 xen_swiotlb_unmap_sg(dev, sgl, i, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC);
381 sg_dma_len(sgl) = 0;
2c647ebe 382 return -EIO;
b097186f 383}
b097186f 384
b097186f 385static void
2e12dcee
CH
386xen_swiotlb_sync_sg_for_cpu(struct device *dev, struct scatterlist *sgl,
387 int nelems, enum dma_data_direction dir)
b097186f
KRW
388{
389 struct scatterlist *sg;
390 int i;
391
2e12dcee
CH
392 for_each_sg(sgl, sg, nelems, i) {
393 xen_swiotlb_sync_single_for_cpu(dev, sg->dma_address,
394 sg->length, dir);
395 }
b097186f 396}
b097186f 397
dceb1a68 398static void
2e12dcee 399xen_swiotlb_sync_sg_for_device(struct device *dev, struct scatterlist *sgl,
b097186f
KRW
400 int nelems, enum dma_data_direction dir)
401{
2e12dcee
CH
402 struct scatterlist *sg;
403 int i;
404
405 for_each_sg(sgl, sg, nelems, i) {
406 xen_swiotlb_sync_single_for_device(dev, sg->dma_address,
407 sg->length, dir);
408 }
b097186f 409}
b097186f 410
b097186f
KRW
411/*
412 * Return whether the given device DMA address mask can be supported
413 * properly. For example, if your device can only drive the low 24-bits
414 * during bus mastering, then you would pass 0x00ffffff as the mask to
415 * this function.
416 */
dceb1a68 417static int
b097186f
KRW
418xen_swiotlb_dma_supported(struct device *hwdev, u64 mask)
419{
463e862a 420 return xen_phys_to_dma(hwdev, io_tlb_default_mem.end - 1) <= mask;
b097186f 421}
eb1ddc00 422
dceb1a68
CH
423const struct dma_map_ops xen_swiotlb_dma_ops = {
424 .alloc = xen_swiotlb_alloc_coherent,
425 .free = xen_swiotlb_free_coherent,
426 .sync_single_for_cpu = xen_swiotlb_sync_single_for_cpu,
427 .sync_single_for_device = xen_swiotlb_sync_single_for_device,
428 .sync_sg_for_cpu = xen_swiotlb_sync_sg_for_cpu,
429 .sync_sg_for_device = xen_swiotlb_sync_sg_for_device,
aca351cc
CH
430 .map_sg = xen_swiotlb_map_sg,
431 .unmap_sg = xen_swiotlb_unmap_sg,
dceb1a68
CH
432 .map_page = xen_swiotlb_map_page,
433 .unmap_page = xen_swiotlb_unmap_page,
434 .dma_supported = xen_swiotlb_dma_supported,
922659ea
CH
435 .mmap = dma_common_mmap,
436 .get_sgtable = dma_common_get_sgtable,
efa70f2f
CH
437 .alloc_pages = dma_common_alloc_pages,
438 .free_pages = dma_common_free_pages,
dceb1a68 439};