Commit | Line | Data |
---|---|---|
d9523678 | 1 | // SPDX-License-Identifier: GPL-2.0-only |
b097186f KRW |
2 | /* |
3 | * Copyright 2010 | |
4 | * by Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> | |
5 | * | |
6 | * This code provides a IOMMU for Xen PV guests with PCI passthrough. | |
7 | * | |
b097186f KRW |
8 | * PV guests under Xen are running in an non-contiguous memory architecture. |
9 | * | |
10 | * When PCI pass-through is utilized, this necessitates an IOMMU for | |
11 | * translating bus (DMA) to virtual and vice-versa and also providing a | |
12 | * mechanism to have contiguous pages for device drivers operations (say DMA | |
13 | * operations). | |
14 | * | |
15 | * Specifically, under Xen the Linux idea of pages is an illusion. It | |
16 | * assumes that pages start at zero and go up to the available memory. To | |
17 | * help with that, the Linux Xen MMU provides a lookup mechanism to | |
18 | * translate the page frame numbers (PFN) to machine frame numbers (MFN) | |
19 | * and vice-versa. The MFN are the "real" frame numbers. Furthermore | |
20 | * memory is not contiguous. Xen hypervisor stitches memory for guests | |
21 | * from different pools, which means there is no guarantee that PFN==MFN | |
22 | * and PFN+1==MFN+1. Lastly with Xen 4.0, pages (in debug mode) are | |
23 | * allocated in descending order (high to low), meaning the guest might | |
24 | * never get any MFN's under the 4GB mark. | |
b097186f KRW |
25 | */ |
26 | ||
283c0972 JP |
27 | #define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt |
28 | ||
2013288f | 29 | #include <linux/memblock.h> |
ea8c64ac | 30 | #include <linux/dma-direct.h> |
9f4df96b | 31 | #include <linux/dma-map-ops.h> |
63c9744b | 32 | #include <linux/export.h> |
b097186f KRW |
33 | #include <xen/swiotlb-xen.h> |
34 | #include <xen/page.h> | |
35 | #include <xen/xen-ops.h> | |
f4b2f07b | 36 | #include <xen/hvc-console.h> |
2b2b614d | 37 | |
83862ccf | 38 | #include <asm/dma-mapping.h> |
e1d8f62a | 39 | |
2b2b614d | 40 | #include <trace/events/swiotlb.h> |
e6fa0dc8 | 41 | #define MAX_DMA_BITS 32 |
b097186f | 42 | |
b097186f KRW |
43 | /* |
44 | * Quick lookup value of the bus address of the IOTLB. | |
45 | */ | |
46 | ||
91ffe4ad | 47 | static inline phys_addr_t xen_phys_to_bus(struct device *dev, phys_addr_t paddr) |
b097186f | 48 | { |
9435cce8 | 49 | unsigned long bfn = pfn_to_bfn(XEN_PFN_DOWN(paddr)); |
91ffe4ad | 50 | phys_addr_t baddr = (phys_addr_t)bfn << XEN_PAGE_SHIFT; |
e17b2f11 | 51 | |
91ffe4ad SS |
52 | baddr |= paddr & ~XEN_PAGE_MASK; |
53 | return baddr; | |
54 | } | |
e17b2f11 | 55 | |
91ffe4ad SS |
56 | static inline dma_addr_t xen_phys_to_dma(struct device *dev, phys_addr_t paddr) |
57 | { | |
58 | return phys_to_dma(dev, xen_phys_to_bus(dev, paddr)); | |
b097186f KRW |
59 | } |
60 | ||
91ffe4ad SS |
61 | static inline phys_addr_t xen_bus_to_phys(struct device *dev, |
62 | phys_addr_t baddr) | |
b097186f | 63 | { |
9435cce8 | 64 | unsigned long xen_pfn = bfn_to_pfn(XEN_PFN_DOWN(baddr)); |
91ffe4ad SS |
65 | phys_addr_t paddr = (xen_pfn << XEN_PAGE_SHIFT) | |
66 | (baddr & ~XEN_PAGE_MASK); | |
e17b2f11 IC |
67 | |
68 | return paddr; | |
b097186f KRW |
69 | } |
70 | ||
91ffe4ad SS |
71 | static inline phys_addr_t xen_dma_to_phys(struct device *dev, |
72 | dma_addr_t dma_addr) | |
73 | { | |
74 | return xen_bus_to_phys(dev, dma_to_phys(dev, dma_addr)); | |
75 | } | |
76 | ||
bf707266 | 77 | static inline int range_straddles_page_boundary(phys_addr_t p, size_t size) |
b097186f | 78 | { |
bf707266 JG |
79 | unsigned long next_bfn, xen_pfn = XEN_PFN_DOWN(p); |
80 | unsigned int i, nr_pages = XEN_PFN_UP(xen_offset_in_page(p) + size); | |
b097186f | 81 | |
9435cce8 | 82 | next_bfn = pfn_to_bfn(xen_pfn); |
b097186f | 83 | |
bf707266 | 84 | for (i = 1; i < nr_pages; i++) |
9435cce8 | 85 | if (pfn_to_bfn(++xen_pfn) != ++next_bfn) |
bf707266 | 86 | return 1; |
b097186f | 87 | |
bf707266 | 88 | return 0; |
b097186f KRW |
89 | } |
90 | ||
38ba51de | 91 | static int is_xen_swiotlb_buffer(struct device *dev, dma_addr_t dma_addr) |
b097186f | 92 | { |
91ffe4ad | 93 | unsigned long bfn = XEN_PFN_DOWN(dma_to_phys(dev, dma_addr)); |
9435cce8 | 94 | unsigned long xen_pfn = bfn_to_local_pfn(bfn); |
e9aab7e4 | 95 | phys_addr_t paddr = (phys_addr_t)xen_pfn << XEN_PAGE_SHIFT; |
b097186f KRW |
96 | |
97 | /* If the address is outside our domain, it CAN | |
98 | * have the same virtual address as another address | |
99 | * in our domain. Therefore _only_ check address within our domain. | |
100 | */ | |
16bc75f3 | 101 | if (pfn_valid(PFN_DOWN(paddr))) |
7fd856aa | 102 | return is_swiotlb_buffer(dev, paddr); |
b097186f KRW |
103 | return 0; |
104 | } | |
105 | ||
566fb90e | 106 | #ifdef CONFIG_X86 |
3f70356e | 107 | int xen_swiotlb_fixup(void *buf, unsigned long nslabs) |
b097186f | 108 | { |
d9a688ad JB |
109 | int rc; |
110 | unsigned int order = get_order(IO_TLB_SEGSIZE << IO_TLB_SHIFT); | |
111 | unsigned int i, dma_bits = order + PAGE_SHIFT; | |
69908907 | 112 | dma_addr_t dma_handle; |
1b65c4e5 | 113 | phys_addr_t p = virt_to_phys(buf); |
b097186f | 114 | |
d9a688ad JB |
115 | BUILD_BUG_ON(IO_TLB_SEGSIZE & (IO_TLB_SEGSIZE - 1)); |
116 | BUG_ON(nslabs % IO_TLB_SEGSIZE); | |
b097186f KRW |
117 | |
118 | i = 0; | |
119 | do { | |
b097186f KRW |
120 | do { |
121 | rc = xen_create_contiguous_region( | |
d9a688ad | 122 | p + (i << IO_TLB_SHIFT), order, |
69908907 | 123 | dma_bits, &dma_handle); |
e6fa0dc8 | 124 | } while (rc && dma_bits++ < MAX_DMA_BITS); |
b097186f KRW |
125 | if (rc) |
126 | return rc; | |
127 | ||
d9a688ad | 128 | i += IO_TLB_SEGSIZE; |
b097186f KRW |
129 | } while (i < nslabs); |
130 | return 0; | |
131 | } | |
132 | ||
dceb1a68 | 133 | static void * |
566fb90e CH |
134 | xen_swiotlb_alloc_coherent(struct device *dev, size_t size, |
135 | dma_addr_t *dma_handle, gfp_t flags, unsigned long attrs) | |
b097186f | 136 | { |
566fb90e | 137 | u64 dma_mask = dev->coherent_dma_mask; |
b097186f | 138 | int order = get_order(size); |
6810df88 | 139 | phys_addr_t phys; |
566fb90e | 140 | void *ret; |
b097186f | 141 | |
566fb90e | 142 | /* Align the allocation to the Xen page size */ |
7250f422 JJ |
143 | size = 1UL << (order + XEN_PAGE_SHIFT); |
144 | ||
566fb90e | 145 | ret = (void *)__get_free_pages(flags, get_order(size)); |
6810df88 KRW |
146 | if (!ret) |
147 | return ret; | |
566fb90e CH |
148 | phys = virt_to_phys(ret); |
149 | ||
150 | *dma_handle = xen_phys_to_dma(dev, phys); | |
151 | if (*dma_handle + size - 1 > dma_mask || | |
152 | range_straddles_page_boundary(phys, size)) { | |
153 | if (xen_create_contiguous_region(phys, order, fls64(dma_mask), | |
154 | dma_handle) != 0) | |
155 | goto out_free_pages; | |
b877ac98 | 156 | SetPageXenRemapped(virt_to_page(ret)); |
b097186f | 157 | } |
566fb90e | 158 | |
6810df88 | 159 | memset(ret, 0, size); |
b097186f | 160 | return ret; |
566fb90e CH |
161 | |
162 | out_free_pages: | |
163 | free_pages((unsigned long)ret, get_order(size)); | |
164 | return NULL; | |
b097186f | 165 | } |
b097186f | 166 | |
dceb1a68 | 167 | static void |
566fb90e CH |
168 | xen_swiotlb_free_coherent(struct device *dev, size_t size, void *vaddr, |
169 | dma_addr_t dma_handle, unsigned long attrs) | |
b097186f | 170 | { |
566fb90e | 171 | phys_addr_t phys = virt_to_phys(vaddr); |
b097186f | 172 | int order = get_order(size); |
6810df88 | 173 | |
7250f422 JJ |
174 | /* Convert the size to actually allocated. */ |
175 | size = 1UL << (order + XEN_PAGE_SHIFT); | |
176 | ||
566fb90e CH |
177 | if (WARN_ON_ONCE(dma_handle + size - 1 > dev->coherent_dma_mask) || |
178 | WARN_ON_ONCE(range_straddles_page_boundary(phys, size))) | |
179 | return; | |
8b1e868f | 180 | |
566fb90e | 181 | if (TestClearPageXenRemapped(virt_to_page(vaddr))) |
1b65c4e5 | 182 | xen_destroy_contiguous_region(phys, order); |
566fb90e | 183 | free_pages((unsigned long)vaddr, get_order(size)); |
b097186f | 184 | } |
566fb90e | 185 | #endif /* CONFIG_X86 */ |
b097186f KRW |
186 | |
187 | /* | |
188 | * Map a single buffer of the indicated size for DMA in streaming mode. The | |
189 | * physical address to use is returned. | |
190 | * | |
191 | * Once the device is given the dma address, the device owns this memory until | |
192 | * either xen_swiotlb_unmap_page or xen_swiotlb_dma_sync_single is performed. | |
193 | */ | |
dceb1a68 | 194 | static dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page, |
b097186f KRW |
195 | unsigned long offset, size_t size, |
196 | enum dma_data_direction dir, | |
00085f1e | 197 | unsigned long attrs) |
b097186f | 198 | { |
e05ed4d1 | 199 | phys_addr_t map, phys = page_to_phys(page) + offset; |
91ffe4ad | 200 | dma_addr_t dev_addr = xen_phys_to_dma(dev, phys); |
b097186f KRW |
201 | |
202 | BUG_ON(dir == DMA_NONE); | |
203 | /* | |
204 | * If the address happens to be in the device's DMA window, | |
205 | * we can safely return the device addr and not worry about bounce | |
206 | * buffering it. | |
207 | */ | |
68a33b17 | 208 | if (dma_capable(dev, dev_addr, size, true) && |
a4dba130 | 209 | !range_straddles_page_boundary(phys, size) && |
291be10f | 210 | !xen_arch_need_swiotlb(dev, phys, dev_addr) && |
903cd0f3 | 211 | !is_swiotlb_force_bounce(dev)) |
063b8271 | 212 | goto done; |
b097186f KRW |
213 | |
214 | /* | |
215 | * Oh well, have to allocate and map a bounce buffer. | |
216 | */ | |
c6af2aa9 | 217 | trace_swiotlb_bounced(dev, dev_addr, size); |
2b2b614d | 218 | |
e81e99ba | 219 | map = swiotlb_tbl_map_single(dev, phys, size, size, 0, dir, attrs); |
9c106119 | 220 | if (map == (phys_addr_t)DMA_MAPPING_ERROR) |
a4abe0ad | 221 | return DMA_MAPPING_ERROR; |
b097186f | 222 | |
b4dca151 | 223 | phys = map; |
91ffe4ad | 224 | dev_addr = xen_phys_to_dma(dev, map); |
b097186f KRW |
225 | |
226 | /* | |
227 | * Ensure that the address returned is DMA'ble | |
228 | */ | |
68a33b17 | 229 | if (unlikely(!dma_capable(dev, dev_addr, size, true))) { |
2973073a | 230 | swiotlb_tbl_unmap_single(dev, map, size, dir, |
063b8271 CH |
231 | attrs | DMA_ATTR_SKIP_CPU_SYNC); |
232 | return DMA_MAPPING_ERROR; | |
233 | } | |
76418421 | 234 | |
063b8271 | 235 | done: |
63f0620c SS |
236 | if (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) { |
237 | if (pfn_valid(PFN_DOWN(dma_to_phys(dev, dev_addr)))) | |
238 | arch_sync_dma_for_device(phys, size, dir); | |
239 | else | |
240 | xen_dma_sync_for_device(dev, dev_addr, size, dir); | |
241 | } | |
063b8271 | 242 | return dev_addr; |
b097186f | 243 | } |
b097186f KRW |
244 | |
245 | /* | |
246 | * Unmap a single streaming mode DMA translation. The dma_addr and size must | |
247 | * match what was provided for in a previous xen_swiotlb_map_page call. All | |
248 | * other usages are undefined. | |
249 | * | |
250 | * After this call, reads by the cpu to the buffer are guaranteed to see | |
251 | * whatever the device wrote there. | |
252 | */ | |
bf7954e7 CH |
253 | static void xen_swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr, |
254 | size_t size, enum dma_data_direction dir, unsigned long attrs) | |
b097186f | 255 | { |
91ffe4ad | 256 | phys_addr_t paddr = xen_dma_to_phys(hwdev, dev_addr); |
b097186f KRW |
257 | |
258 | BUG_ON(dir == DMA_NONE); | |
259 | ||
63f0620c SS |
260 | if (!dev_is_dma_coherent(hwdev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) { |
261 | if (pfn_valid(PFN_DOWN(dma_to_phys(hwdev, dev_addr)))) | |
262 | arch_sync_dma_for_cpu(paddr, size, dir); | |
263 | else | |
264 | xen_dma_sync_for_cpu(hwdev, dev_addr, size, dir); | |
265 | } | |
6cf05463 | 266 | |
b097186f | 267 | /* NOTE: We use dev_addr here, not paddr! */ |
38ba51de | 268 | if (is_xen_swiotlb_buffer(hwdev, dev_addr)) |
2973073a | 269 | swiotlb_tbl_unmap_single(hwdev, paddr, size, dir, attrs); |
b097186f KRW |
270 | } |
271 | ||
b097186f | 272 | static void |
2e12dcee CH |
273 | xen_swiotlb_sync_single_for_cpu(struct device *dev, dma_addr_t dma_addr, |
274 | size_t size, enum dma_data_direction dir) | |
b097186f | 275 | { |
91ffe4ad | 276 | phys_addr_t paddr = xen_dma_to_phys(dev, dma_addr); |
6cf05463 | 277 | |
63f0620c SS |
278 | if (!dev_is_dma_coherent(dev)) { |
279 | if (pfn_valid(PFN_DOWN(dma_to_phys(dev, dma_addr)))) | |
280 | arch_sync_dma_for_cpu(paddr, size, dir); | |
281 | else | |
282 | xen_dma_sync_for_cpu(dev, dma_addr, size, dir); | |
283 | } | |
6cf05463 | 284 | |
38ba51de | 285 | if (is_xen_swiotlb_buffer(dev, dma_addr)) |
80808d27 | 286 | swiotlb_sync_single_for_cpu(dev, paddr, size, dir); |
b097186f KRW |
287 | } |
288 | ||
2e12dcee CH |
289 | static void |
290 | xen_swiotlb_sync_single_for_device(struct device *dev, dma_addr_t dma_addr, | |
291 | size_t size, enum dma_data_direction dir) | |
b097186f | 292 | { |
91ffe4ad | 293 | phys_addr_t paddr = xen_dma_to_phys(dev, dma_addr); |
b097186f | 294 | |
38ba51de | 295 | if (is_xen_swiotlb_buffer(dev, dma_addr)) |
80808d27 | 296 | swiotlb_sync_single_for_device(dev, paddr, size, dir); |
2e12dcee | 297 | |
63f0620c SS |
298 | if (!dev_is_dma_coherent(dev)) { |
299 | if (pfn_valid(PFN_DOWN(dma_to_phys(dev, dma_addr)))) | |
300 | arch_sync_dma_for_device(paddr, size, dir); | |
301 | else | |
302 | xen_dma_sync_for_device(dev, dma_addr, size, dir); | |
303 | } | |
b097186f | 304 | } |
dceb1a68 CH |
305 | |
306 | /* | |
307 | * Unmap a set of streaming mode DMA translations. Again, cpu read rules | |
308 | * concerning calls here are the same as for swiotlb_unmap_page() above. | |
309 | */ | |
310 | static void | |
aca351cc CH |
311 | xen_swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sgl, int nelems, |
312 | enum dma_data_direction dir, unsigned long attrs) | |
dceb1a68 CH |
313 | { |
314 | struct scatterlist *sg; | |
315 | int i; | |
316 | ||
317 | BUG_ON(dir == DMA_NONE); | |
318 | ||
319 | for_each_sg(sgl, sg, nelems, i) | |
bf7954e7 CH |
320 | xen_swiotlb_unmap_page(hwdev, sg->dma_address, sg_dma_len(sg), |
321 | dir, attrs); | |
dceb1a68 CH |
322 | |
323 | } | |
b097186f | 324 | |
dceb1a68 | 325 | static int |
8b35d9fe | 326 | xen_swiotlb_map_sg(struct device *dev, struct scatterlist *sgl, int nelems, |
aca351cc | 327 | enum dma_data_direction dir, unsigned long attrs) |
b097186f KRW |
328 | { |
329 | struct scatterlist *sg; | |
330 | int i; | |
331 | ||
332 | BUG_ON(dir == DMA_NONE); | |
333 | ||
334 | for_each_sg(sgl, sg, nelems, i) { | |
8b35d9fe CH |
335 | sg->dma_address = xen_swiotlb_map_page(dev, sg_page(sg), |
336 | sg->offset, sg->length, dir, attrs); | |
337 | if (sg->dma_address == DMA_MAPPING_ERROR) | |
338 | goto out_unmap; | |
781575cd | 339 | sg_dma_len(sg) = sg->length; |
b097186f | 340 | } |
8b35d9fe | 341 | |
b097186f | 342 | return nelems; |
8b35d9fe CH |
343 | out_unmap: |
344 | xen_swiotlb_unmap_sg(dev, sgl, i, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC); | |
345 | sg_dma_len(sgl) = 0; | |
2c647ebe | 346 | return -EIO; |
b097186f | 347 | } |
b097186f | 348 | |
b097186f | 349 | static void |
2e12dcee CH |
350 | xen_swiotlb_sync_sg_for_cpu(struct device *dev, struct scatterlist *sgl, |
351 | int nelems, enum dma_data_direction dir) | |
b097186f KRW |
352 | { |
353 | struct scatterlist *sg; | |
354 | int i; | |
355 | ||
2e12dcee CH |
356 | for_each_sg(sgl, sg, nelems, i) { |
357 | xen_swiotlb_sync_single_for_cpu(dev, sg->dma_address, | |
358 | sg->length, dir); | |
359 | } | |
b097186f | 360 | } |
b097186f | 361 | |
dceb1a68 | 362 | static void |
2e12dcee | 363 | xen_swiotlb_sync_sg_for_device(struct device *dev, struct scatterlist *sgl, |
b097186f KRW |
364 | int nelems, enum dma_data_direction dir) |
365 | { | |
2e12dcee CH |
366 | struct scatterlist *sg; |
367 | int i; | |
368 | ||
369 | for_each_sg(sgl, sg, nelems, i) { | |
370 | xen_swiotlb_sync_single_for_device(dev, sg->dma_address, | |
371 | sg->length, dir); | |
372 | } | |
b097186f | 373 | } |
b097186f | 374 | |
b097186f KRW |
375 | /* |
376 | * Return whether the given device DMA address mask can be supported | |
377 | * properly. For example, if your device can only drive the low 24-bits | |
378 | * during bus mastering, then you would pass 0x00ffffff as the mask to | |
379 | * this function. | |
380 | */ | |
dceb1a68 | 381 | static int |
b097186f KRW |
382 | xen_swiotlb_dma_supported(struct device *hwdev, u64 mask) |
383 | { | |
05ee7741 | 384 | return xen_phys_to_dma(hwdev, default_swiotlb_limit()) <= mask; |
b097186f | 385 | } |
eb1ddc00 | 386 | |
dceb1a68 | 387 | const struct dma_map_ops xen_swiotlb_dma_ops = { |
566fb90e | 388 | #ifdef CONFIG_X86 |
dceb1a68 CH |
389 | .alloc = xen_swiotlb_alloc_coherent, |
390 | .free = xen_swiotlb_free_coherent, | |
566fb90e CH |
391 | #else |
392 | .alloc = dma_direct_alloc, | |
393 | .free = dma_direct_free, | |
394 | #endif | |
dceb1a68 CH |
395 | .sync_single_for_cpu = xen_swiotlb_sync_single_for_cpu, |
396 | .sync_single_for_device = xen_swiotlb_sync_single_for_device, | |
397 | .sync_sg_for_cpu = xen_swiotlb_sync_sg_for_cpu, | |
398 | .sync_sg_for_device = xen_swiotlb_sync_sg_for_device, | |
aca351cc CH |
399 | .map_sg = xen_swiotlb_map_sg, |
400 | .unmap_sg = xen_swiotlb_unmap_sg, | |
dceb1a68 CH |
401 | .map_page = xen_swiotlb_map_page, |
402 | .unmap_page = xen_swiotlb_unmap_page, | |
403 | .dma_supported = xen_swiotlb_dma_supported, | |
922659ea CH |
404 | .mmap = dma_common_mmap, |
405 | .get_sgtable = dma_common_get_sgtable, | |
efa70f2f CH |
406 | .alloc_pages = dma_common_alloc_pages, |
407 | .free_pages = dma_common_free_pages, | |
dceb1a68 | 408 | }; |