Commit | Line | Data |
---|---|---|
b2441318 | 1 | // SPDX-License-Identifier: GPL-2.0 |
a8463d4b | 2 | /* |
bc3ec75d CH |
3 | * Copyright (C) 2018 Christoph Hellwig. |
4 | * | |
5 | * DMA operations that map physical memory directly without using an IOMMU. | |
a8463d4b | 6 | */ |
57c8a661 | 7 | #include <linux/memblock.h> /* for max_pfn */ |
a8463d4b CB |
8 | #include <linux/export.h> |
9 | #include <linux/mm.h> | |
2e86a047 | 10 | #include <linux/dma-direct.h> |
a8463d4b | 11 | #include <linux/scatterlist.h> |
080321d3 | 12 | #include <linux/dma-contiguous.h> |
bc3ec75d | 13 | #include <linux/dma-noncoherent.h> |
25f1e188 | 14 | #include <linux/pfn.h> |
c10f07aa | 15 | #include <linux/set_memory.h> |
55897af6 | 16 | #include <linux/swiotlb.h> |
a8463d4b | 17 | |
c61e9637 CH |
18 | /* |
19 | * Most architectures use ZONE_DMA for the first 16 Megabytes, but | |
20 | * some use it for entirely different regions: | |
21 | */ | |
22 | #ifndef ARCH_ZONE_DMA_BITS | |
23 | #define ARCH_ZONE_DMA_BITS 24 | |
24 | #endif | |
25 | ||
c10f07aa CH |
26 | /* |
27 | * For AMD SEV all DMA must be to unencrypted addresses. | |
28 | */ | |
29 | static inline bool force_dma_unencrypted(void) | |
30 | { | |
31 | return sev_active(); | |
32 | } | |
33 | ||
58dfd4ac | 34 | static void report_addr(struct device *dev, dma_addr_t dma_addr, size_t size) |
27975969 | 35 | { |
58dfd4ac CH |
36 | if (!dev->dma_mask) { |
37 | dev_err_once(dev, "DMA map on device without dma_mask\n"); | |
38 | } else if (*dev->dma_mask >= DMA_BIT_MASK(32) || dev->bus_dma_mask) { | |
39 | dev_err_once(dev, | |
40 | "overflow %pad+%zu of DMA mask %llx bus mask %llx\n", | |
41 | &dma_addr, size, *dev->dma_mask, dev->bus_dma_mask); | |
27975969 | 42 | } |
58dfd4ac | 43 | WARN_ON_ONCE(1); |
27975969 CH |
44 | } |
45 | ||
a20bb058 CH |
46 | static inline dma_addr_t phys_to_dma_direct(struct device *dev, |
47 | phys_addr_t phys) | |
48 | { | |
49 | if (force_dma_unencrypted()) | |
50 | return __phys_to_dma(dev, phys); | |
51 | return phys_to_dma(dev, phys); | |
52 | } | |
53 | ||
54 | u64 dma_direct_get_required_mask(struct device *dev) | |
55 | { | |
56 | u64 max_dma = phys_to_dma_direct(dev, (max_pfn - 1) << PAGE_SHIFT); | |
57 | ||
b4ebe606 CH |
58 | if (dev->bus_dma_mask && dev->bus_dma_mask < max_dma) |
59 | max_dma = dev->bus_dma_mask; | |
60 | ||
a20bb058 CH |
61 | return (1ULL << (fls64(max_dma) - 1)) * 2 - 1; |
62 | } | |
63 | ||
7d21ee4c CH |
64 | static gfp_t __dma_direct_optimal_gfp_mask(struct device *dev, u64 dma_mask, |
65 | u64 *phys_mask) | |
66 | { | |
b4ebe606 CH |
67 | if (dev->bus_dma_mask && dev->bus_dma_mask < dma_mask) |
68 | dma_mask = dev->bus_dma_mask; | |
69 | ||
7d21ee4c CH |
70 | if (force_dma_unencrypted()) |
71 | *phys_mask = __dma_to_phys(dev, dma_mask); | |
72 | else | |
73 | *phys_mask = dma_to_phys(dev, dma_mask); | |
74 | ||
79ac32a4 CH |
75 | /* |
76 | * Optimistically try the zone that the physical address mask falls | |
77 | * into first. If that returns memory that isn't actually addressable | |
78 | * we will fallback to the next lower zone and try again. | |
79 | * | |
80 | * Note that GFP_DMA32 and GFP_DMA are no ops without the corresponding | |
81 | * zones. | |
82 | */ | |
7d21ee4c CH |
83 | if (*phys_mask <= DMA_BIT_MASK(ARCH_ZONE_DMA_BITS)) |
84 | return GFP_DMA; | |
85 | if (*phys_mask <= DMA_BIT_MASK(32)) | |
86 | return GFP_DMA32; | |
87 | return 0; | |
88 | } | |
89 | ||
95f18391 CH |
90 | static bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size) |
91 | { | |
a20bb058 | 92 | return phys_to_dma_direct(dev, phys) + size - 1 <= |
b4ebe606 | 93 | min_not_zero(dev->coherent_dma_mask, dev->bus_dma_mask); |
95f18391 CH |
94 | } |
95 | ||
b18814e7 | 96 | struct page *__dma_direct_alloc_pages(struct device *dev, size_t size, |
bc3ec75d | 97 | dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs) |
a8463d4b | 98 | { |
080321d3 CH |
99 | unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; |
100 | int page_order = get_order(size); | |
101 | struct page *page = NULL; | |
7d21ee4c | 102 | u64 phys_mask; |
a8463d4b | 103 | |
b9fd0426 CH |
104 | if (attrs & DMA_ATTR_NO_WARN) |
105 | gfp |= __GFP_NOWARN; | |
106 | ||
e89f5b37 CH |
107 | /* we always manually zero the memory once we are done: */ |
108 | gfp &= ~__GFP_ZERO; | |
7d21ee4c CH |
109 | gfp |= __dma_direct_optimal_gfp_mask(dev, dev->coherent_dma_mask, |
110 | &phys_mask); | |
95f18391 | 111 | again: |
080321d3 | 112 | /* CMA can be used only in the context which permits sleeping */ |
95f18391 | 113 | if (gfpflags_allow_blocking(gfp)) { |
d834c5ab MS |
114 | page = dma_alloc_from_contiguous(dev, count, page_order, |
115 | gfp & __GFP_NOWARN); | |
95f18391 CH |
116 | if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) { |
117 | dma_release_from_contiguous(dev, page, count); | |
118 | page = NULL; | |
119 | } | |
120 | } | |
080321d3 | 121 | if (!page) |
21f237e4 | 122 | page = alloc_pages_node(dev_to_node(dev), gfp, page_order); |
95f18391 CH |
123 | |
124 | if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) { | |
125 | __free_pages(page, page_order); | |
126 | page = NULL; | |
127 | ||
de7eab30 | 128 | if (IS_ENABLED(CONFIG_ZONE_DMA32) && |
7d21ee4c | 129 | phys_mask < DMA_BIT_MASK(64) && |
de7eab30 TI |
130 | !(gfp & (GFP_DMA32 | GFP_DMA))) { |
131 | gfp |= GFP_DMA32; | |
132 | goto again; | |
133 | } | |
134 | ||
fbce251b | 135 | if (IS_ENABLED(CONFIG_ZONE_DMA) && !(gfp & GFP_DMA)) { |
95f18391 CH |
136 | gfp = (gfp & ~GFP_DMA32) | GFP_DMA; |
137 | goto again; | |
138 | } | |
139 | } | |
140 | ||
b18814e7 CH |
141 | return page; |
142 | } | |
143 | ||
144 | void *dma_direct_alloc_pages(struct device *dev, size_t size, | |
145 | dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs) | |
146 | { | |
147 | struct page *page; | |
148 | void *ret; | |
149 | ||
150 | page = __dma_direct_alloc_pages(dev, size, dma_handle, gfp, attrs); | |
080321d3 CH |
151 | if (!page) |
152 | return NULL; | |
b18814e7 | 153 | |
704f2c20 CH |
154 | if (PageHighMem(page)) { |
155 | /* | |
156 | * Depending on the cma= arguments and per-arch setup | |
157 | * dma_alloc_from_contiguous could return highmem pages. | |
158 | * Without remapping there is no way to return them here, | |
159 | * so log an error and fail. | |
160 | */ | |
161 | dev_info(dev, "Rejecting highmem page from CMA.\n"); | |
162 | __dma_direct_free_pages(dev, size, page); | |
163 | return NULL; | |
164 | } | |
165 | ||
c10f07aa CH |
166 | ret = page_address(page); |
167 | if (force_dma_unencrypted()) { | |
b18814e7 | 168 | set_memory_decrypted((unsigned long)ret, 1 << get_order(size)); |
c10f07aa CH |
169 | *dma_handle = __phys_to_dma(dev, page_to_phys(page)); |
170 | } else { | |
171 | *dma_handle = phys_to_dma(dev, page_to_phys(page)); | |
172 | } | |
173 | memset(ret, 0, size); | |
174 | return ret; | |
a8463d4b CB |
175 | } |
176 | ||
b18814e7 CH |
177 | void __dma_direct_free_pages(struct device *dev, size_t size, struct page *page) |
178 | { | |
179 | unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; | |
180 | ||
181 | if (!dma_release_from_contiguous(dev, page, count)) | |
182 | __free_pages(page, get_order(size)); | |
183 | } | |
184 | ||
bc3ec75d | 185 | void dma_direct_free_pages(struct device *dev, size_t size, void *cpu_addr, |
002e6745 | 186 | dma_addr_t dma_addr, unsigned long attrs) |
a8463d4b | 187 | { |
c10f07aa | 188 | unsigned int page_order = get_order(size); |
080321d3 | 189 | |
c10f07aa CH |
190 | if (force_dma_unencrypted()) |
191 | set_memory_encrypted((unsigned long)cpu_addr, 1 << page_order); | |
b18814e7 | 192 | __dma_direct_free_pages(dev, size, virt_to_page(cpu_addr)); |
a8463d4b CB |
193 | } |
194 | ||
bc3ec75d CH |
195 | void *dma_direct_alloc(struct device *dev, size_t size, |
196 | dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs) | |
197 | { | |
198 | if (!dev_is_dma_coherent(dev)) | |
199 | return arch_dma_alloc(dev, size, dma_handle, gfp, attrs); | |
200 | return dma_direct_alloc_pages(dev, size, dma_handle, gfp, attrs); | |
201 | } | |
202 | ||
203 | void dma_direct_free(struct device *dev, size_t size, | |
204 | void *cpu_addr, dma_addr_t dma_addr, unsigned long attrs) | |
205 | { | |
206 | if (!dev_is_dma_coherent(dev)) | |
207 | arch_dma_free(dev, size, cpu_addr, dma_addr, attrs); | |
208 | else | |
209 | dma_direct_free_pages(dev, size, cpu_addr, dma_addr, attrs); | |
210 | } | |
211 | ||
55897af6 CH |
212 | #if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) || \ |
213 | defined(CONFIG_SWIOTLB) | |
214 | void dma_direct_sync_single_for_device(struct device *dev, | |
bc3ec75d CH |
215 | dma_addr_t addr, size_t size, enum dma_data_direction dir) |
216 | { | |
55897af6 CH |
217 | phys_addr_t paddr = dma_to_phys(dev, addr); |
218 | ||
219 | if (unlikely(is_swiotlb_buffer(paddr))) | |
220 | swiotlb_tbl_sync_single(dev, paddr, size, dir, SYNC_FOR_DEVICE); | |
221 | ||
222 | if (!dev_is_dma_coherent(dev)) | |
223 | arch_sync_dma_for_device(dev, paddr, size, dir); | |
bc3ec75d | 224 | } |
356da6d0 | 225 | EXPORT_SYMBOL(dma_direct_sync_single_for_device); |
bc3ec75d | 226 | |
55897af6 | 227 | void dma_direct_sync_sg_for_device(struct device *dev, |
bc3ec75d CH |
228 | struct scatterlist *sgl, int nents, enum dma_data_direction dir) |
229 | { | |
230 | struct scatterlist *sg; | |
231 | int i; | |
232 | ||
55897af6 CH |
233 | for_each_sg(sgl, sg, nents, i) { |
234 | if (unlikely(is_swiotlb_buffer(sg_phys(sg)))) | |
235 | swiotlb_tbl_sync_single(dev, sg_phys(sg), sg->length, | |
236 | dir, SYNC_FOR_DEVICE); | |
bc3ec75d | 237 | |
55897af6 CH |
238 | if (!dev_is_dma_coherent(dev)) |
239 | arch_sync_dma_for_device(dev, sg_phys(sg), sg->length, | |
240 | dir); | |
241 | } | |
bc3ec75d | 242 | } |
356da6d0 | 243 | EXPORT_SYMBOL(dma_direct_sync_sg_for_device); |
17ac5247 | 244 | #endif |
bc3ec75d CH |
245 | |
246 | #if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) || \ | |
55897af6 CH |
247 | defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL) || \ |
248 | defined(CONFIG_SWIOTLB) | |
249 | void dma_direct_sync_single_for_cpu(struct device *dev, | |
bc3ec75d CH |
250 | dma_addr_t addr, size_t size, enum dma_data_direction dir) |
251 | { | |
55897af6 CH |
252 | phys_addr_t paddr = dma_to_phys(dev, addr); |
253 | ||
254 | if (!dev_is_dma_coherent(dev)) { | |
255 | arch_sync_dma_for_cpu(dev, paddr, size, dir); | |
256 | arch_sync_dma_for_cpu_all(dev); | |
257 | } | |
258 | ||
259 | if (unlikely(is_swiotlb_buffer(paddr))) | |
260 | swiotlb_tbl_sync_single(dev, paddr, size, dir, SYNC_FOR_CPU); | |
bc3ec75d | 261 | } |
356da6d0 | 262 | EXPORT_SYMBOL(dma_direct_sync_single_for_cpu); |
bc3ec75d | 263 | |
55897af6 | 264 | void dma_direct_sync_sg_for_cpu(struct device *dev, |
bc3ec75d CH |
265 | struct scatterlist *sgl, int nents, enum dma_data_direction dir) |
266 | { | |
267 | struct scatterlist *sg; | |
268 | int i; | |
269 | ||
55897af6 CH |
270 | for_each_sg(sgl, sg, nents, i) { |
271 | if (!dev_is_dma_coherent(dev)) | |
272 | arch_sync_dma_for_cpu(dev, sg_phys(sg), sg->length, dir); | |
273 | ||
274 | if (unlikely(is_swiotlb_buffer(sg_phys(sg)))) | |
275 | swiotlb_tbl_sync_single(dev, sg_phys(sg), sg->length, dir, | |
276 | SYNC_FOR_CPU); | |
277 | } | |
bc3ec75d | 278 | |
55897af6 CH |
279 | if (!dev_is_dma_coherent(dev)) |
280 | arch_sync_dma_for_cpu_all(dev); | |
bc3ec75d | 281 | } |
356da6d0 | 282 | EXPORT_SYMBOL(dma_direct_sync_sg_for_cpu); |
bc3ec75d | 283 | |
55897af6 | 284 | void dma_direct_unmap_page(struct device *dev, dma_addr_t addr, |
bc3ec75d CH |
285 | size_t size, enum dma_data_direction dir, unsigned long attrs) |
286 | { | |
55897af6 CH |
287 | phys_addr_t phys = dma_to_phys(dev, addr); |
288 | ||
bc3ec75d CH |
289 | if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) |
290 | dma_direct_sync_single_for_cpu(dev, addr, size, dir); | |
55897af6 CH |
291 | |
292 | if (unlikely(is_swiotlb_buffer(phys))) | |
293 | swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs); | |
bc3ec75d | 294 | } |
356da6d0 | 295 | EXPORT_SYMBOL(dma_direct_unmap_page); |
bc3ec75d | 296 | |
55897af6 | 297 | void dma_direct_unmap_sg(struct device *dev, struct scatterlist *sgl, |
bc3ec75d CH |
298 | int nents, enum dma_data_direction dir, unsigned long attrs) |
299 | { | |
55897af6 CH |
300 | struct scatterlist *sg; |
301 | int i; | |
302 | ||
303 | for_each_sg(sgl, sg, nents, i) | |
304 | dma_direct_unmap_page(dev, sg->dma_address, sg_dma_len(sg), dir, | |
305 | attrs); | |
bc3ec75d | 306 | } |
356da6d0 | 307 | EXPORT_SYMBOL(dma_direct_unmap_sg); |
bc3ec75d CH |
308 | #endif |
309 | ||
55897af6 CH |
310 | static inline bool dma_direct_possible(struct device *dev, dma_addr_t dma_addr, |
311 | size_t size) | |
312 | { | |
313 | return swiotlb_force != SWIOTLB_FORCE && | |
314 | (!dev || dma_capable(dev, dma_addr, size)); | |
315 | } | |
316 | ||
782e6769 | 317 | dma_addr_t dma_direct_map_page(struct device *dev, struct page *page, |
002e6745 CH |
318 | unsigned long offset, size_t size, enum dma_data_direction dir, |
319 | unsigned long attrs) | |
a8463d4b | 320 | { |
bc3ec75d CH |
321 | phys_addr_t phys = page_to_phys(page) + offset; |
322 | dma_addr_t dma_addr = phys_to_dma(dev, phys); | |
27975969 | 323 | |
55897af6 CH |
324 | if (unlikely(!dma_direct_possible(dev, dma_addr, size)) && |
325 | !swiotlb_map(dev, &phys, &dma_addr, size, dir, attrs)) { | |
58dfd4ac | 326 | report_addr(dev, dma_addr, size); |
b0cbeae4 | 327 | return DMA_MAPPING_ERROR; |
58dfd4ac | 328 | } |
bc3ec75d | 329 | |
55897af6 CH |
330 | if (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) |
331 | arch_sync_dma_for_device(dev, phys, size, dir); | |
27975969 | 332 | return dma_addr; |
a8463d4b | 333 | } |
356da6d0 | 334 | EXPORT_SYMBOL(dma_direct_map_page); |
a8463d4b | 335 | |
782e6769 CH |
336 | int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents, |
337 | enum dma_data_direction dir, unsigned long attrs) | |
a8463d4b CB |
338 | { |
339 | int i; | |
340 | struct scatterlist *sg; | |
341 | ||
342 | for_each_sg(sgl, sg, nents, i) { | |
17ac5247 CH |
343 | sg->dma_address = dma_direct_map_page(dev, sg_page(sg), |
344 | sg->offset, sg->length, dir, attrs); | |
345 | if (sg->dma_address == DMA_MAPPING_ERROR) | |
55897af6 | 346 | goto out_unmap; |
a8463d4b CB |
347 | sg_dma_len(sg) = sg->length; |
348 | } | |
349 | ||
350 | return nents; | |
55897af6 CH |
351 | |
352 | out_unmap: | |
353 | dma_direct_unmap_sg(dev, sgl, i, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC); | |
354 | return 0; | |
a8463d4b | 355 | } |
356da6d0 | 356 | EXPORT_SYMBOL(dma_direct_map_sg); |
a8463d4b | 357 | |
cfced786 CH |
358 | dma_addr_t dma_direct_map_resource(struct device *dev, phys_addr_t paddr, |
359 | size_t size, enum dma_data_direction dir, unsigned long attrs) | |
360 | { | |
361 | dma_addr_t dma_addr = paddr; | |
362 | ||
363 | if (unlikely(!dma_direct_possible(dev, dma_addr, size))) { | |
364 | report_addr(dev, dma_addr, size); | |
365 | return DMA_MAPPING_ERROR; | |
366 | } | |
367 | ||
368 | return dma_addr; | |
369 | } | |
370 | EXPORT_SYMBOL(dma_direct_map_resource); | |
371 | ||
9d7a224b CH |
372 | /* |
373 | * Because 32-bit DMA masks are so common we expect every architecture to be | |
374 | * able to satisfy them - either by not supporting more physical memory, or by | |
375 | * providing a ZONE_DMA32. If neither is the case, the architecture needs to | |
376 | * use an IOMMU instead of the direct mapping. | |
377 | */ | |
1a9777a8 CH |
378 | int dma_direct_supported(struct device *dev, u64 mask) |
379 | { | |
9d7a224b CH |
380 | u64 min_mask; |
381 | ||
382 | if (IS_ENABLED(CONFIG_ZONE_DMA)) | |
383 | min_mask = DMA_BIT_MASK(ARCH_ZONE_DMA_BITS); | |
384 | else | |
385 | min_mask = DMA_BIT_MASK(32); | |
386 | ||
387 | min_mask = min_t(u64, min_mask, (max_pfn - 1) << PAGE_SHIFT); | |
388 | ||
c92a54cf LT |
389 | /* |
390 | * This check needs to be against the actual bit mask value, so | |
391 | * use __phys_to_dma() here so that the SME encryption mask isn't | |
392 | * part of the check. | |
393 | */ | |
394 | return mask >= __phys_to_dma(dev, min_mask); | |
1a9777a8 | 395 | } |
133d624b JR |
396 | |
397 | size_t dma_direct_max_mapping_size(struct device *dev) | |
398 | { | |
399 | size_t size = SIZE_MAX; | |
400 | ||
401 | /* If SWIOTLB is active, use its maximum mapping size */ | |
402 | if (is_swiotlb_active()) | |
403 | size = swiotlb_max_mapping_size(dev); | |
404 | ||
405 | return size; | |
406 | } |