Commit | Line | Data |
---|---|---|
b2441318 | 1 | // SPDX-License-Identifier: GPL-2.0 |
a8463d4b | 2 | /* |
bc3ec75d CH |
3 | * Copyright (C) 2018 Christoph Hellwig. |
4 | * | |
5 | * DMA operations that map physical memory directly without using an IOMMU. | |
a8463d4b | 6 | */ |
57c8a661 | 7 | #include <linux/memblock.h> /* for max_pfn */ |
a8463d4b CB |
8 | #include <linux/export.h> |
9 | #include <linux/mm.h> | |
2e86a047 | 10 | #include <linux/dma-direct.h> |
a8463d4b | 11 | #include <linux/scatterlist.h> |
080321d3 | 12 | #include <linux/dma-contiguous.h> |
bc3ec75d | 13 | #include <linux/dma-noncoherent.h> |
25f1e188 | 14 | #include <linux/pfn.h> |
c10f07aa | 15 | #include <linux/set_memory.h> |
55897af6 | 16 | #include <linux/swiotlb.h> |
a8463d4b | 17 | |
c61e9637 CH |
18 | /* |
19 | * Most architectures use ZONE_DMA for the first 16 Megabytes, but | |
20 | * some use it for entirely different regions: | |
21 | */ | |
22 | #ifndef ARCH_ZONE_DMA_BITS | |
23 | #define ARCH_ZONE_DMA_BITS 24 | |
24 | #endif | |
25 | ||
c10f07aa CH |
26 | /* |
27 | * For AMD SEV all DMA must be to unencrypted addresses. | |
28 | */ | |
29 | static inline bool force_dma_unencrypted(void) | |
30 | { | |
31 | return sev_active(); | |
32 | } | |
33 | ||
58dfd4ac | 34 | static void report_addr(struct device *dev, dma_addr_t dma_addr, size_t size) |
27975969 | 35 | { |
58dfd4ac CH |
36 | if (!dev->dma_mask) { |
37 | dev_err_once(dev, "DMA map on device without dma_mask\n"); | |
38 | } else if (*dev->dma_mask >= DMA_BIT_MASK(32) || dev->bus_dma_mask) { | |
39 | dev_err_once(dev, | |
40 | "overflow %pad+%zu of DMA mask %llx bus mask %llx\n", | |
41 | &dma_addr, size, *dev->dma_mask, dev->bus_dma_mask); | |
27975969 | 42 | } |
58dfd4ac | 43 | WARN_ON_ONCE(1); |
27975969 CH |
44 | } |
45 | ||
a20bb058 CH |
46 | static inline dma_addr_t phys_to_dma_direct(struct device *dev, |
47 | phys_addr_t phys) | |
48 | { | |
49 | if (force_dma_unencrypted()) | |
50 | return __phys_to_dma(dev, phys); | |
51 | return phys_to_dma(dev, phys); | |
52 | } | |
53 | ||
54 | u64 dma_direct_get_required_mask(struct device *dev) | |
55 | { | |
56 | u64 max_dma = phys_to_dma_direct(dev, (max_pfn - 1) << PAGE_SHIFT); | |
57 | ||
b4ebe606 CH |
58 | if (dev->bus_dma_mask && dev->bus_dma_mask < max_dma) |
59 | max_dma = dev->bus_dma_mask; | |
60 | ||
a20bb058 CH |
61 | return (1ULL << (fls64(max_dma) - 1)) * 2 - 1; |
62 | } | |
63 | ||
7d21ee4c CH |
64 | static gfp_t __dma_direct_optimal_gfp_mask(struct device *dev, u64 dma_mask, |
65 | u64 *phys_mask) | |
66 | { | |
b4ebe606 CH |
67 | if (dev->bus_dma_mask && dev->bus_dma_mask < dma_mask) |
68 | dma_mask = dev->bus_dma_mask; | |
69 | ||
7d21ee4c CH |
70 | if (force_dma_unencrypted()) |
71 | *phys_mask = __dma_to_phys(dev, dma_mask); | |
72 | else | |
73 | *phys_mask = dma_to_phys(dev, dma_mask); | |
74 | ||
79ac32a4 CH |
75 | /* |
76 | * Optimistically try the zone that the physical address mask falls | |
77 | * into first. If that returns memory that isn't actually addressable | |
78 | * we will fallback to the next lower zone and try again. | |
79 | * | |
80 | * Note that GFP_DMA32 and GFP_DMA are no ops without the corresponding | |
81 | * zones. | |
82 | */ | |
7d21ee4c CH |
83 | if (*phys_mask <= DMA_BIT_MASK(ARCH_ZONE_DMA_BITS)) |
84 | return GFP_DMA; | |
85 | if (*phys_mask <= DMA_BIT_MASK(32)) | |
86 | return GFP_DMA32; | |
87 | return 0; | |
88 | } | |
89 | ||
95f18391 CH |
90 | static bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size) |
91 | { | |
a20bb058 | 92 | return phys_to_dma_direct(dev, phys) + size - 1 <= |
b4ebe606 | 93 | min_not_zero(dev->coherent_dma_mask, dev->bus_dma_mask); |
95f18391 CH |
94 | } |
95 | ||
b18814e7 | 96 | struct page *__dma_direct_alloc_pages(struct device *dev, size_t size, |
bc3ec75d | 97 | dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs) |
a8463d4b | 98 | { |
080321d3 | 99 | struct page *page = NULL; |
7d21ee4c | 100 | u64 phys_mask; |
a8463d4b | 101 | |
b9fd0426 CH |
102 | if (attrs & DMA_ATTR_NO_WARN) |
103 | gfp |= __GFP_NOWARN; | |
104 | ||
e89f5b37 CH |
105 | /* we always manually zero the memory once we are done: */ |
106 | gfp &= ~__GFP_ZERO; | |
7d21ee4c CH |
107 | gfp |= __dma_direct_optimal_gfp_mask(dev, dev->coherent_dma_mask, |
108 | &phys_mask); | |
95f18391 | 109 | again: |
b1d2dc00 | 110 | page = dma_alloc_contiguous(dev, size, gfp); |
95f18391 | 111 | if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) { |
b1d2dc00 | 112 | dma_free_contiguous(dev, page, size); |
95f18391 CH |
113 | page = NULL; |
114 | ||
de7eab30 | 115 | if (IS_ENABLED(CONFIG_ZONE_DMA32) && |
7d21ee4c | 116 | phys_mask < DMA_BIT_MASK(64) && |
de7eab30 TI |
117 | !(gfp & (GFP_DMA32 | GFP_DMA))) { |
118 | gfp |= GFP_DMA32; | |
119 | goto again; | |
120 | } | |
121 | ||
fbce251b | 122 | if (IS_ENABLED(CONFIG_ZONE_DMA) && !(gfp & GFP_DMA)) { |
95f18391 CH |
123 | gfp = (gfp & ~GFP_DMA32) | GFP_DMA; |
124 | goto again; | |
125 | } | |
126 | } | |
127 | ||
b18814e7 CH |
128 | return page; |
129 | } | |
130 | ||
131 | void *dma_direct_alloc_pages(struct device *dev, size_t size, | |
132 | dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs) | |
133 | { | |
134 | struct page *page; | |
135 | void *ret; | |
136 | ||
137 | page = __dma_direct_alloc_pages(dev, size, dma_handle, gfp, attrs); | |
080321d3 CH |
138 | if (!page) |
139 | return NULL; | |
b18814e7 | 140 | |
d98849af CH |
141 | if (attrs & DMA_ATTR_NO_KERNEL_MAPPING) { |
142 | /* remove any dirty cache lines on the kernel alias */ | |
143 | if (!PageHighMem(page)) | |
144 | arch_dma_prep_coherent(page, size); | |
145 | /* return the page pointer as the opaque cookie */ | |
146 | return page; | |
147 | } | |
148 | ||
704f2c20 CH |
149 | if (PageHighMem(page)) { |
150 | /* | |
151 | * Depending on the cma= arguments and per-arch setup | |
b1d2dc00 | 152 | * dma_alloc_contiguous could return highmem pages. |
704f2c20 CH |
153 | * Without remapping there is no way to return them here, |
154 | * so log an error and fail. | |
155 | */ | |
156 | dev_info(dev, "Rejecting highmem page from CMA.\n"); | |
157 | __dma_direct_free_pages(dev, size, page); | |
158 | return NULL; | |
159 | } | |
160 | ||
c10f07aa CH |
161 | ret = page_address(page); |
162 | if (force_dma_unencrypted()) { | |
b18814e7 | 163 | set_memory_decrypted((unsigned long)ret, 1 << get_order(size)); |
c10f07aa CH |
164 | *dma_handle = __phys_to_dma(dev, page_to_phys(page)); |
165 | } else { | |
166 | *dma_handle = phys_to_dma(dev, page_to_phys(page)); | |
167 | } | |
168 | memset(ret, 0, size); | |
c30700db CH |
169 | |
170 | if (IS_ENABLED(CONFIG_ARCH_HAS_UNCACHED_SEGMENT) && | |
4b85faed | 171 | dma_alloc_need_uncached(dev, attrs)) { |
c30700db CH |
172 | arch_dma_prep_coherent(page, size); |
173 | ret = uncached_kernel_address(ret); | |
174 | } | |
175 | ||
c10f07aa | 176 | return ret; |
a8463d4b CB |
177 | } |
178 | ||
b18814e7 CH |
179 | void __dma_direct_free_pages(struct device *dev, size_t size, struct page *page) |
180 | { | |
b1d2dc00 | 181 | dma_free_contiguous(dev, page, size); |
b18814e7 CH |
182 | } |
183 | ||
bc3ec75d | 184 | void dma_direct_free_pages(struct device *dev, size_t size, void *cpu_addr, |
002e6745 | 185 | dma_addr_t dma_addr, unsigned long attrs) |
a8463d4b | 186 | { |
c10f07aa | 187 | unsigned int page_order = get_order(size); |
080321d3 | 188 | |
d98849af CH |
189 | if (attrs & DMA_ATTR_NO_KERNEL_MAPPING) { |
190 | /* cpu_addr is a struct page cookie, not a kernel address */ | |
191 | __dma_direct_free_pages(dev, size, cpu_addr); | |
192 | return; | |
193 | } | |
194 | ||
c10f07aa CH |
195 | if (force_dma_unencrypted()) |
196 | set_memory_encrypted((unsigned long)cpu_addr, 1 << page_order); | |
c30700db CH |
197 | |
198 | if (IS_ENABLED(CONFIG_ARCH_HAS_UNCACHED_SEGMENT) && | |
4b85faed | 199 | dma_alloc_need_uncached(dev, attrs)) |
c30700db | 200 | cpu_addr = cached_kernel_address(cpu_addr); |
b18814e7 | 201 | __dma_direct_free_pages(dev, size, virt_to_page(cpu_addr)); |
a8463d4b CB |
202 | } |
203 | ||
bc3ec75d CH |
204 | void *dma_direct_alloc(struct device *dev, size_t size, |
205 | dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs) | |
206 | { | |
c30700db | 207 | if (!IS_ENABLED(CONFIG_ARCH_HAS_UNCACHED_SEGMENT) && |
c2f2124e | 208 | dma_alloc_need_uncached(dev, attrs)) |
bc3ec75d CH |
209 | return arch_dma_alloc(dev, size, dma_handle, gfp, attrs); |
210 | return dma_direct_alloc_pages(dev, size, dma_handle, gfp, attrs); | |
211 | } | |
212 | ||
213 | void dma_direct_free(struct device *dev, size_t size, | |
214 | void *cpu_addr, dma_addr_t dma_addr, unsigned long attrs) | |
215 | { | |
c30700db | 216 | if (!IS_ENABLED(CONFIG_ARCH_HAS_UNCACHED_SEGMENT) && |
c2f2124e | 217 | dma_alloc_need_uncached(dev, attrs)) |
bc3ec75d CH |
218 | arch_dma_free(dev, size, cpu_addr, dma_addr, attrs); |
219 | else | |
220 | dma_direct_free_pages(dev, size, cpu_addr, dma_addr, attrs); | |
221 | } | |
222 | ||
55897af6 CH |
223 | #if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) || \ |
224 | defined(CONFIG_SWIOTLB) | |
225 | void dma_direct_sync_single_for_device(struct device *dev, | |
bc3ec75d CH |
226 | dma_addr_t addr, size_t size, enum dma_data_direction dir) |
227 | { | |
55897af6 CH |
228 | phys_addr_t paddr = dma_to_phys(dev, addr); |
229 | ||
230 | if (unlikely(is_swiotlb_buffer(paddr))) | |
231 | swiotlb_tbl_sync_single(dev, paddr, size, dir, SYNC_FOR_DEVICE); | |
232 | ||
233 | if (!dev_is_dma_coherent(dev)) | |
234 | arch_sync_dma_for_device(dev, paddr, size, dir); | |
bc3ec75d | 235 | } |
356da6d0 | 236 | EXPORT_SYMBOL(dma_direct_sync_single_for_device); |
bc3ec75d | 237 | |
55897af6 | 238 | void dma_direct_sync_sg_for_device(struct device *dev, |
bc3ec75d CH |
239 | struct scatterlist *sgl, int nents, enum dma_data_direction dir) |
240 | { | |
241 | struct scatterlist *sg; | |
242 | int i; | |
243 | ||
55897af6 CH |
244 | for_each_sg(sgl, sg, nents, i) { |
245 | if (unlikely(is_swiotlb_buffer(sg_phys(sg)))) | |
246 | swiotlb_tbl_sync_single(dev, sg_phys(sg), sg->length, | |
247 | dir, SYNC_FOR_DEVICE); | |
bc3ec75d | 248 | |
55897af6 CH |
249 | if (!dev_is_dma_coherent(dev)) |
250 | arch_sync_dma_for_device(dev, sg_phys(sg), sg->length, | |
251 | dir); | |
252 | } | |
bc3ec75d | 253 | } |
356da6d0 | 254 | EXPORT_SYMBOL(dma_direct_sync_sg_for_device); |
17ac5247 | 255 | #endif |
bc3ec75d CH |
256 | |
257 | #if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) || \ | |
55897af6 CH |
258 | defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL) || \ |
259 | defined(CONFIG_SWIOTLB) | |
260 | void dma_direct_sync_single_for_cpu(struct device *dev, | |
bc3ec75d CH |
261 | dma_addr_t addr, size_t size, enum dma_data_direction dir) |
262 | { | |
55897af6 CH |
263 | phys_addr_t paddr = dma_to_phys(dev, addr); |
264 | ||
265 | if (!dev_is_dma_coherent(dev)) { | |
266 | arch_sync_dma_for_cpu(dev, paddr, size, dir); | |
267 | arch_sync_dma_for_cpu_all(dev); | |
268 | } | |
269 | ||
270 | if (unlikely(is_swiotlb_buffer(paddr))) | |
271 | swiotlb_tbl_sync_single(dev, paddr, size, dir, SYNC_FOR_CPU); | |
bc3ec75d | 272 | } |
356da6d0 | 273 | EXPORT_SYMBOL(dma_direct_sync_single_for_cpu); |
bc3ec75d | 274 | |
55897af6 | 275 | void dma_direct_sync_sg_for_cpu(struct device *dev, |
bc3ec75d CH |
276 | struct scatterlist *sgl, int nents, enum dma_data_direction dir) |
277 | { | |
278 | struct scatterlist *sg; | |
279 | int i; | |
280 | ||
55897af6 CH |
281 | for_each_sg(sgl, sg, nents, i) { |
282 | if (!dev_is_dma_coherent(dev)) | |
283 | arch_sync_dma_for_cpu(dev, sg_phys(sg), sg->length, dir); | |
284 | ||
285 | if (unlikely(is_swiotlb_buffer(sg_phys(sg)))) | |
286 | swiotlb_tbl_sync_single(dev, sg_phys(sg), sg->length, dir, | |
287 | SYNC_FOR_CPU); | |
288 | } | |
bc3ec75d | 289 | |
55897af6 CH |
290 | if (!dev_is_dma_coherent(dev)) |
291 | arch_sync_dma_for_cpu_all(dev); | |
bc3ec75d | 292 | } |
356da6d0 | 293 | EXPORT_SYMBOL(dma_direct_sync_sg_for_cpu); |
bc3ec75d | 294 | |
55897af6 | 295 | void dma_direct_unmap_page(struct device *dev, dma_addr_t addr, |
bc3ec75d CH |
296 | size_t size, enum dma_data_direction dir, unsigned long attrs) |
297 | { | |
55897af6 CH |
298 | phys_addr_t phys = dma_to_phys(dev, addr); |
299 | ||
bc3ec75d CH |
300 | if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) |
301 | dma_direct_sync_single_for_cpu(dev, addr, size, dir); | |
55897af6 CH |
302 | |
303 | if (unlikely(is_swiotlb_buffer(phys))) | |
304 | swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs); | |
bc3ec75d | 305 | } |
356da6d0 | 306 | EXPORT_SYMBOL(dma_direct_unmap_page); |
bc3ec75d | 307 | |
55897af6 | 308 | void dma_direct_unmap_sg(struct device *dev, struct scatterlist *sgl, |
bc3ec75d CH |
309 | int nents, enum dma_data_direction dir, unsigned long attrs) |
310 | { | |
55897af6 CH |
311 | struct scatterlist *sg; |
312 | int i; | |
313 | ||
314 | for_each_sg(sgl, sg, nents, i) | |
315 | dma_direct_unmap_page(dev, sg->dma_address, sg_dma_len(sg), dir, | |
316 | attrs); | |
bc3ec75d | 317 | } |
356da6d0 | 318 | EXPORT_SYMBOL(dma_direct_unmap_sg); |
bc3ec75d CH |
319 | #endif |
320 | ||
55897af6 CH |
321 | static inline bool dma_direct_possible(struct device *dev, dma_addr_t dma_addr, |
322 | size_t size) | |
323 | { | |
324 | return swiotlb_force != SWIOTLB_FORCE && | |
d7e02a93 | 325 | dma_capable(dev, dma_addr, size); |
55897af6 CH |
326 | } |
327 | ||
782e6769 | 328 | dma_addr_t dma_direct_map_page(struct device *dev, struct page *page, |
002e6745 CH |
329 | unsigned long offset, size_t size, enum dma_data_direction dir, |
330 | unsigned long attrs) | |
a8463d4b | 331 | { |
bc3ec75d CH |
332 | phys_addr_t phys = page_to_phys(page) + offset; |
333 | dma_addr_t dma_addr = phys_to_dma(dev, phys); | |
27975969 | 334 | |
55897af6 CH |
335 | if (unlikely(!dma_direct_possible(dev, dma_addr, size)) && |
336 | !swiotlb_map(dev, &phys, &dma_addr, size, dir, attrs)) { | |
58dfd4ac | 337 | report_addr(dev, dma_addr, size); |
b0cbeae4 | 338 | return DMA_MAPPING_ERROR; |
58dfd4ac | 339 | } |
bc3ec75d | 340 | |
55897af6 CH |
341 | if (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) |
342 | arch_sync_dma_for_device(dev, phys, size, dir); | |
27975969 | 343 | return dma_addr; |
a8463d4b | 344 | } |
356da6d0 | 345 | EXPORT_SYMBOL(dma_direct_map_page); |
a8463d4b | 346 | |
782e6769 CH |
347 | int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents, |
348 | enum dma_data_direction dir, unsigned long attrs) | |
a8463d4b CB |
349 | { |
350 | int i; | |
351 | struct scatterlist *sg; | |
352 | ||
353 | for_each_sg(sgl, sg, nents, i) { | |
17ac5247 CH |
354 | sg->dma_address = dma_direct_map_page(dev, sg_page(sg), |
355 | sg->offset, sg->length, dir, attrs); | |
356 | if (sg->dma_address == DMA_MAPPING_ERROR) | |
55897af6 | 357 | goto out_unmap; |
a8463d4b CB |
358 | sg_dma_len(sg) = sg->length; |
359 | } | |
360 | ||
361 | return nents; | |
55897af6 CH |
362 | |
363 | out_unmap: | |
364 | dma_direct_unmap_sg(dev, sgl, i, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC); | |
365 | return 0; | |
a8463d4b | 366 | } |
356da6d0 | 367 | EXPORT_SYMBOL(dma_direct_map_sg); |
a8463d4b | 368 | |
cfced786 CH |
369 | dma_addr_t dma_direct_map_resource(struct device *dev, phys_addr_t paddr, |
370 | size_t size, enum dma_data_direction dir, unsigned long attrs) | |
371 | { | |
372 | dma_addr_t dma_addr = paddr; | |
373 | ||
374 | if (unlikely(!dma_direct_possible(dev, dma_addr, size))) { | |
375 | report_addr(dev, dma_addr, size); | |
376 | return DMA_MAPPING_ERROR; | |
377 | } | |
378 | ||
379 | return dma_addr; | |
380 | } | |
381 | EXPORT_SYMBOL(dma_direct_map_resource); | |
382 | ||
9d7a224b CH |
383 | /* |
384 | * Because 32-bit DMA masks are so common we expect every architecture to be | |
385 | * able to satisfy them - either by not supporting more physical memory, or by | |
386 | * providing a ZONE_DMA32. If neither is the case, the architecture needs to | |
387 | * use an IOMMU instead of the direct mapping. | |
388 | */ | |
1a9777a8 CH |
389 | int dma_direct_supported(struct device *dev, u64 mask) |
390 | { | |
9d7a224b CH |
391 | u64 min_mask; |
392 | ||
393 | if (IS_ENABLED(CONFIG_ZONE_DMA)) | |
394 | min_mask = DMA_BIT_MASK(ARCH_ZONE_DMA_BITS); | |
395 | else | |
396 | min_mask = DMA_BIT_MASK(32); | |
397 | ||
398 | min_mask = min_t(u64, min_mask, (max_pfn - 1) << PAGE_SHIFT); | |
399 | ||
c92a54cf LT |
400 | /* |
401 | * This check needs to be against the actual bit mask value, so | |
402 | * use __phys_to_dma() here so that the SME encryption mask isn't | |
403 | * part of the check. | |
404 | */ | |
405 | return mask >= __phys_to_dma(dev, min_mask); | |
1a9777a8 | 406 | } |
133d624b JR |
407 | |
408 | size_t dma_direct_max_mapping_size(struct device *dev) | |
409 | { | |
410 | size_t size = SIZE_MAX; | |
411 | ||
412 | /* If SWIOTLB is active, use its maximum mapping size */ | |
413 | if (is_swiotlb_active()) | |
414 | size = swiotlb_max_mapping_size(dev); | |
415 | ||
416 | return size; | |
417 | } |