Merge tag 'dma-mapping-4.20-1' of git://git.infradead.org/users/hch/dma-mapping
authorLinus Torvalds <torvalds@linux-foundation.org>
Fri, 26 Oct 2018 18:29:17 +0000 (11:29 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 26 Oct 2018 18:29:17 +0000 (11:29 -0700)
Pull more dma-mapping updates from Christoph Hellwig:

 - various swiotlb cleanups

 - do not dip into the Ń•wiotlb pool for dma coherent allocations

 - add support for not cache coherent DMA to swiotlb

 - switch ARM64 to use the generic swiotlb_dma_ops

* tag 'dma-mapping-4.20-1' of git://git.infradead.org/users/hch/dma-mapping:
  arm64: use the generic swiotlb_dma_ops
  swiotlb: add support for non-coherent DMA
  swiotlb: don't dip into swiotlb pool for coherent allocations
  swiotlb: refactor swiotlb_map_page
  swiotlb: use swiotlb_map_page in swiotlb_map_sg_attrs
  swiotlb: merge swiotlb_unmap_page and unmap_single
  swiotlb: remove the overflow buffer
  swiotlb: do not panic on mapping failures
  swiotlb: mark is_swiotlb_buffer static
  swiotlb: remove a pointless comment

arch/arm64/Kconfig
arch/arm64/include/asm/device.h
arch/arm64/include/asm/dma-mapping.h
arch/arm64/mm/dma-mapping.c
arch/powerpc/kernel/dma-swiotlb.c
include/linux/dma-direct.h
include/linux/swiotlb.h
kernel/dma/direct.c
kernel/dma/swiotlb.c

index c03cd0d765d3e9bf7060e6efbae40dd6c019102f..964f682a2b7b0b8da41248665d09980a1808983e 100644 (file)
@@ -11,6 +11,8 @@ config ARM64
        select ARCH_CLOCKSOURCE_DATA
        select ARCH_HAS_DEBUG_VIRTUAL
        select ARCH_HAS_DEVMEM_IS_ALLOWED
+       select ARCH_HAS_DMA_COHERENT_TO_PFN
+       select ARCH_HAS_DMA_MMAP_PGPROT
        select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI
        select ARCH_HAS_ELF_RANDOMIZE
        select ARCH_HAS_FAST_MULTIPLIER
@@ -24,6 +26,8 @@ config ARM64
        select ARCH_HAS_SG_CHAIN
        select ARCH_HAS_STRICT_KERNEL_RWX
        select ARCH_HAS_STRICT_MODULE_RWX
+       select ARCH_HAS_SYNC_DMA_FOR_DEVICE
+       select ARCH_HAS_SYNC_DMA_FOR_CPU
        select ARCH_HAS_SYSCALL_WRAPPER
        select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
        select ARCH_HAVE_NMI_SAFE_CMPXCHG
index 5a5fa47a6b18bbd5b29f6dde5fb9201a524ddc2e..3dd3d664c5c5dfc3ee759af069d248f09eba4e50 100644 (file)
@@ -23,7 +23,6 @@ struct dev_archdata {
 #ifdef CONFIG_XEN
        const struct dma_map_ops *dev_dma_ops;
 #endif
-       bool dma_coherent;
 };
 
 struct pdev_archdata {
index b7847eb8a7bb76d8602d7a328b478220e7e66120..c41f3fb1446cec797ccea57f7dbcaaf5472ea3c5 100644 (file)
@@ -44,10 +44,13 @@ void arch_teardown_dma_ops(struct device *dev);
 #define arch_teardown_dma_ops  arch_teardown_dma_ops
 #endif
 
-/* do not use this function in a driver */
+/*
+ * Do not use this function in a driver, it is only provided for
+ * arch/arm/mm/xen.c, which is used by arm64 as well.
+ */
 static inline bool is_device_dma_coherent(struct device *dev)
 {
-       return dev->archdata.dma_coherent;
+       return dev->dma_coherent;
 }
 
 #endif /* __KERNEL__ */
index cf017c5bb5e7ad76e5e934f02c84279bfa28ea57..d190612b8f33b2555ffe447bfa72421b34430c4c 100644 (file)
@@ -25,6 +25,7 @@
 #include <linux/slab.h>
 #include <linux/genalloc.h>
 #include <linux/dma-direct.h>
+#include <linux/dma-noncoherent.h>
 #include <linux/dma-contiguous.h>
 #include <linux/vmalloc.h>
 #include <linux/swiotlb.h>
 
 #include <asm/cacheflush.h>
 
-static int swiotlb __ro_after_init;
-
-static pgprot_t __get_dma_pgprot(unsigned long attrs, pgprot_t prot,
-                                bool coherent)
-{
-       if (!coherent || (attrs & DMA_ATTR_WRITE_COMBINE))
-               return pgprot_writecombine(prot);
-       return prot;
-}
-
 static struct gen_pool *atomic_pool __ro_after_init;
 
 #define DEFAULT_DMA_COHERENT_POOL_SIZE  SZ_256K
@@ -91,18 +82,16 @@ static int __free_from_pool(void *start, size_t size)
        return 1;
 }
 
-static void *__dma_alloc(struct device *dev, size_t size,
-                        dma_addr_t *dma_handle, gfp_t flags,
-                        unsigned long attrs)
+void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
+               gfp_t flags, unsigned long attrs)
 {
        struct page *page;
        void *ptr, *coherent_ptr;
-       bool coherent = is_device_dma_coherent(dev);
-       pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL, false);
+       pgprot_t prot = pgprot_writecombine(PAGE_KERNEL);
 
        size = PAGE_ALIGN(size);
 
-       if (!coherent && !gfpflags_allow_blocking(flags)) {
+       if (!gfpflags_allow_blocking(flags)) {
                struct page *page = NULL;
                void *addr = __alloc_from_pool(size, &page, flags);
 
@@ -112,14 +101,10 @@ static void *__dma_alloc(struct device *dev, size_t size,
                return addr;
        }
 
-       ptr = swiotlb_alloc(dev, size, dma_handle, flags, attrs);
+       ptr = dma_direct_alloc_pages(dev, size, dma_handle, flags, attrs);
        if (!ptr)
                goto no_mem;
 
-       /* no need for non-cacheable mapping if coherent */
-       if (coherent)
-               return ptr;
-
        /* remove any dirty cache lines on the kernel alias */
        __dma_flush_area(ptr, size);
 
@@ -133,130 +118,57 @@ static void *__dma_alloc(struct device *dev, size_t size,
        return coherent_ptr;
 
 no_map:
-       swiotlb_free(dev, size, ptr, *dma_handle, attrs);
+       dma_direct_free_pages(dev, size, ptr, *dma_handle, attrs);
 no_mem:
        return NULL;
 }
 
-static void __dma_free(struct device *dev, size_t size,
-                      void *vaddr, dma_addr_t dma_handle,
-                      unsigned long attrs)
+void arch_dma_free(struct device *dev, size_t size, void *vaddr,
+               dma_addr_t dma_handle, unsigned long attrs)
 {
-       void *swiotlb_addr = phys_to_virt(dma_to_phys(dev, dma_handle));
+       if (!__free_from_pool(vaddr, PAGE_ALIGN(size))) {
+               void *kaddr = phys_to_virt(dma_to_phys(dev, dma_handle));
 
-       size = PAGE_ALIGN(size);
-
-       if (!is_device_dma_coherent(dev)) {
-               if (__free_from_pool(vaddr, size))
-                       return;
                vunmap(vaddr);
+               dma_direct_free_pages(dev, size, kaddr, dma_handle, attrs);
        }
-       swiotlb_free(dev, size, swiotlb_addr, dma_handle, attrs);
 }
 
-static dma_addr_t __swiotlb_map_page(struct device *dev, struct page *page,
-                                    unsigned long offset, size_t size,
-                                    enum dma_data_direction dir,
-                                    unsigned long attrs)
+long arch_dma_coherent_to_pfn(struct device *dev, void *cpu_addr,
+               dma_addr_t dma_addr)
 {
-       dma_addr_t dev_addr;
-
-       dev_addr = swiotlb_map_page(dev, page, offset, size, dir, attrs);
-       if (!is_device_dma_coherent(dev) &&
-           (attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0)
-               __dma_map_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir);
-
-       return dev_addr;
-}
-
-
-static void __swiotlb_unmap_page(struct device *dev, dma_addr_t dev_addr,
-                                size_t size, enum dma_data_direction dir,
-                                unsigned long attrs)
-{
-       if (!is_device_dma_coherent(dev) &&
-           (attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0)
-               __dma_unmap_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir);
-       swiotlb_unmap_page(dev, dev_addr, size, dir, attrs);
+       return __phys_to_pfn(dma_to_phys(dev, dma_addr));
 }
 
-static int __swiotlb_map_sg_attrs(struct device *dev, struct scatterlist *sgl,
-                                 int nelems, enum dma_data_direction dir,
-                                 unsigned long attrs)
+pgprot_t arch_dma_mmap_pgprot(struct device *dev, pgprot_t prot,
+               unsigned long attrs)
 {
-       struct scatterlist *sg;
-       int i, ret;
-
-       ret = swiotlb_map_sg_attrs(dev, sgl, nelems, dir, attrs);
-       if (!is_device_dma_coherent(dev) &&
-           (attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0)
-               for_each_sg(sgl, sg, ret, i)
-                       __dma_map_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)),
-                                      sg->length, dir);
-
-       return ret;
-}
-
-static void __swiotlb_unmap_sg_attrs(struct device *dev,
-                                    struct scatterlist *sgl, int nelems,
-                                    enum dma_data_direction dir,
-                                    unsigned long attrs)
-{
-       struct scatterlist *sg;
-       int i;
-
-       if (!is_device_dma_coherent(dev) &&
-           (attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0)
-               for_each_sg(sgl, sg, nelems, i)
-                       __dma_unmap_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)),
-                                        sg->length, dir);
-       swiotlb_unmap_sg_attrs(dev, sgl, nelems, dir, attrs);
+       if (!dev_is_dma_coherent(dev) || (attrs & DMA_ATTR_WRITE_COMBINE))
+               return pgprot_writecombine(prot);
+       return prot;
 }
 
-static void __swiotlb_sync_single_for_cpu(struct device *dev,
-                                         dma_addr_t dev_addr, size_t size,
-                                         enum dma_data_direction dir)
+void arch_sync_dma_for_device(struct device *dev, phys_addr_t paddr,
+               size_t size, enum dma_data_direction dir)
 {
-       if (!is_device_dma_coherent(dev))
-               __dma_unmap_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir);
-       swiotlb_sync_single_for_cpu(dev, dev_addr, size, dir);
+       __dma_map_area(phys_to_virt(paddr), size, dir);
 }
 
-static void __swiotlb_sync_single_for_device(struct device *dev,
-                                            dma_addr_t dev_addr, size_t size,
-                                            enum dma_data_direction dir)
+void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr,
+               size_t size, enum dma_data_direction dir)
 {
-       swiotlb_sync_single_for_device(dev, dev_addr, size, dir);
-       if (!is_device_dma_coherent(dev))
-               __dma_map_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir);
+       __dma_unmap_area(phys_to_virt(paddr), size, dir);
 }
 
-static void __swiotlb_sync_sg_for_cpu(struct device *dev,
-                                     struct scatterlist *sgl, int nelems,
-                                     enum dma_data_direction dir)
+static int __swiotlb_get_sgtable_page(struct sg_table *sgt,
+                                     struct page *page, size_t size)
 {
-       struct scatterlist *sg;
-       int i;
-
-       if (!is_device_dma_coherent(dev))
-               for_each_sg(sgl, sg, nelems, i)
-                       __dma_unmap_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)),
-                                        sg->length, dir);
-       swiotlb_sync_sg_for_cpu(dev, sgl, nelems, dir);
-}
+       int ret = sg_alloc_table(sgt, 1, GFP_KERNEL);
 
-static void __swiotlb_sync_sg_for_device(struct device *dev,
-                                        struct scatterlist *sgl, int nelems,
-                                        enum dma_data_direction dir)
-{
-       struct scatterlist *sg;
-       int i;
+       if (!ret)
+               sg_set_page(sgt->sgl, page, PAGE_ALIGN(size), 0);
 
-       swiotlb_sync_sg_for_device(dev, sgl, nelems, dir);
-       if (!is_device_dma_coherent(dev))
-               for_each_sg(sgl, sg, nelems, i)
-                       __dma_map_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)),
-                                      sg->length, dir);
+       return ret;
 }
 
 static int __swiotlb_mmap_pfn(struct vm_area_struct *vma,
@@ -277,74 +189,6 @@ static int __swiotlb_mmap_pfn(struct vm_area_struct *vma,
        return ret;
 }
 
-static int __swiotlb_mmap(struct device *dev,
-                         struct vm_area_struct *vma,
-                         void *cpu_addr, dma_addr_t dma_addr, size_t size,
-                         unsigned long attrs)
-{
-       int ret;
-       unsigned long pfn = dma_to_phys(dev, dma_addr) >> PAGE_SHIFT;
-
-       vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot,
-                                            is_device_dma_coherent(dev));
-
-       if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret))
-               return ret;
-
-       return __swiotlb_mmap_pfn(vma, pfn, size);
-}
-
-static int __swiotlb_get_sgtable_page(struct sg_table *sgt,
-                                     struct page *page, size_t size)
-{
-       int ret = sg_alloc_table(sgt, 1, GFP_KERNEL);
-
-       if (!ret)
-               sg_set_page(sgt->sgl, page, PAGE_ALIGN(size), 0);
-
-       return ret;
-}
-
-static int __swiotlb_get_sgtable(struct device *dev, struct sg_table *sgt,
-                                void *cpu_addr, dma_addr_t handle, size_t size,
-                                unsigned long attrs)
-{
-       struct page *page = phys_to_page(dma_to_phys(dev, handle));
-
-       return __swiotlb_get_sgtable_page(sgt, page, size);
-}
-
-static int __swiotlb_dma_supported(struct device *hwdev, u64 mask)
-{
-       if (swiotlb)
-               return swiotlb_dma_supported(hwdev, mask);
-       return 1;
-}
-
-static int __swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t addr)
-{
-       if (swiotlb)
-               return swiotlb_dma_mapping_error(hwdev, addr);
-       return 0;
-}
-
-static const struct dma_map_ops arm64_swiotlb_dma_ops = {
-       .alloc = __dma_alloc,
-       .free = __dma_free,
-       .mmap = __swiotlb_mmap,
-       .get_sgtable = __swiotlb_get_sgtable,
-       .map_page = __swiotlb_map_page,
-       .unmap_page = __swiotlb_unmap_page,
-       .map_sg = __swiotlb_map_sg_attrs,
-       .unmap_sg = __swiotlb_unmap_sg_attrs,
-       .sync_single_for_cpu = __swiotlb_sync_single_for_cpu,
-       .sync_single_for_device = __swiotlb_sync_single_for_device,
-       .sync_sg_for_cpu = __swiotlb_sync_sg_for_cpu,
-       .sync_sg_for_device = __swiotlb_sync_sg_for_device,
-       .dma_supported = __swiotlb_dma_supported,
-       .mapping_error = __swiotlb_dma_mapping_error,
-};
-
 static int __init atomic_pool_init(void)
 {
        pgprot_t prot = __pgprot(PROT_NORMAL_NC);
@@ -500,10 +344,6 @@ EXPORT_SYMBOL(dummy_dma_ops);
 
 static int __init arm64_dma_init(void)
 {
-       if (swiotlb_force == SWIOTLB_FORCE ||
-           max_pfn > (arm64_dma_phys_limit >> PAGE_SHIFT))
-               swiotlb = 1;
-
        WARN_TAINT(ARCH_DMA_MINALIGN < cache_line_size(),
                   TAINT_CPU_OUT_OF_SPEC,
                   "ARCH_DMA_MINALIGN smaller than CTR_EL0.CWG (%d < %d)",
@@ -528,7 +368,7 @@ static void *__iommu_alloc_attrs(struct device *dev, size_t size,
                                 dma_addr_t *handle, gfp_t gfp,
                                 unsigned long attrs)
 {
-       bool coherent = is_device_dma_coherent(dev);
+       bool coherent = dev_is_dma_coherent(dev);
        int ioprot = dma_info_to_prot(DMA_BIDIRECTIONAL, coherent, attrs);
        size_t iosize = size;
        void *addr;
@@ -569,7 +409,7 @@ static void *__iommu_alloc_attrs(struct device *dev, size_t size,
                        addr = NULL;
                }
        } else if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) {
-               pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL, coherent);
+               pgprot_t prot = arch_dma_mmap_pgprot(dev, PAGE_KERNEL, attrs);
                struct page *page;
 
                page = dma_alloc_from_contiguous(dev, size >> PAGE_SHIFT,
@@ -596,7 +436,7 @@ static void *__iommu_alloc_attrs(struct device *dev, size_t size,
                                                    size >> PAGE_SHIFT);
                }
        } else {
-               pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL, coherent);
+               pgprot_t prot = arch_dma_mmap_pgprot(dev, PAGE_KERNEL, attrs);
                struct page **pages;
 
                pages = iommu_dma_alloc(dev, iosize, gfp, attrs, ioprot,
@@ -658,8 +498,7 @@ static int __iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma,
        struct vm_struct *area;
        int ret;
 
-       vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot,
-                                            is_device_dma_coherent(dev));
+       vma->vm_page_prot = arch_dma_mmap_pgprot(dev, vma->vm_page_prot, attrs);
 
        if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret))
                return ret;
@@ -709,11 +548,11 @@ static void __iommu_sync_single_for_cpu(struct device *dev,
 {
        phys_addr_t phys;
 
-       if (is_device_dma_coherent(dev))
+       if (dev_is_dma_coherent(dev))
                return;
 
        phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dev_addr);
-       __dma_unmap_area(phys_to_virt(phys), size, dir);
+       arch_sync_dma_for_cpu(dev, phys, size, dir);
 }
 
 static void __iommu_sync_single_for_device(struct device *dev,
@@ -722,11 +561,11 @@ static void __iommu_sync_single_for_device(struct device *dev,
 {
        phys_addr_t phys;
 
-       if (is_device_dma_coherent(dev))
+       if (dev_is_dma_coherent(dev))
                return;
 
        phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dev_addr);
-       __dma_map_area(phys_to_virt(phys), size, dir);
+       arch_sync_dma_for_device(dev, phys, size, dir);
 }
 
 static dma_addr_t __iommu_map_page(struct device *dev, struct page *page,
@@ -734,7 +573,7 @@ static dma_addr_t __iommu_map_page(struct device *dev, struct page *page,
                                   enum dma_data_direction dir,
                                   unsigned long attrs)
 {
-       bool coherent = is_device_dma_coherent(dev);
+       bool coherent = dev_is_dma_coherent(dev);
        int prot = dma_info_to_prot(dir, coherent, attrs);
        dma_addr_t dev_addr = iommu_dma_map_page(dev, page, offset, size, prot);
 
@@ -762,11 +601,11 @@ static void __iommu_sync_sg_for_cpu(struct device *dev,
        struct scatterlist *sg;
        int i;
 
-       if (is_device_dma_coherent(dev))
+       if (dev_is_dma_coherent(dev))
                return;
 
        for_each_sg(sgl, sg, nelems, i)
-               __dma_unmap_area(sg_virt(sg), sg->length, dir);
+               arch_sync_dma_for_cpu(dev, sg_phys(sg), sg->length, dir);
 }
 
 static void __iommu_sync_sg_for_device(struct device *dev,
@@ -776,18 +615,18 @@ static void __iommu_sync_sg_for_device(struct device *dev,
        struct scatterlist *sg;
        int i;
 
-       if (is_device_dma_coherent(dev))
+       if (dev_is_dma_coherent(dev))
                return;
 
        for_each_sg(sgl, sg, nelems, i)
-               __dma_map_area(sg_virt(sg), sg->length, dir);
+               arch_sync_dma_for_device(dev, sg_phys(sg), sg->length, dir);
 }
 
 static int __iommu_map_sg_attrs(struct device *dev, struct scatterlist *sgl,
                                int nelems, enum dma_data_direction dir,
                                unsigned long attrs)
 {
-       bool coherent = is_device_dma_coherent(dev);
+       bool coherent = dev_is_dma_coherent(dev);
 
        if ((attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0)
                __iommu_sync_sg_for_device(dev, sgl, nelems, dir);
@@ -879,9 +718,9 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
                        const struct iommu_ops *iommu, bool coherent)
 {
        if (!dev->dma_ops)
-               dev->dma_ops = &arm64_swiotlb_dma_ops;
+               dev->dma_ops = &swiotlb_dma_ops;
 
-       dev->archdata.dma_coherent = coherent;
+       dev->dma_coherent = coherent;
        __iommu_setup_dma_ops(dev, dma_base, size, iommu);
 
 #ifdef CONFIG_XEN
index 88f3963ca30f685aead454338a2467a223dbc6ae..5fc335f4d9cd0f561dfa3a947e2502e7de88e658 100644 (file)
@@ -11,7 +11,7 @@
  *
  */
 
-#include <linux/dma-mapping.h>
+#include <linux/dma-direct.h>
 #include <linux/memblock.h>
 #include <linux/pfn.h>
 #include <linux/of_platform.h>
@@ -59,7 +59,7 @@ const struct dma_map_ops powerpc_swiotlb_dma_ops = {
        .sync_single_for_device = swiotlb_sync_single_for_device,
        .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu,
        .sync_sg_for_device = swiotlb_sync_sg_for_device,
-       .mapping_error = swiotlb_dma_mapping_error,
+       .mapping_error = dma_direct_mapping_error,
        .get_required_mask = swiotlb_powerpc_get_required,
 };
 
index fbca184ff5a0ac2f5f4dbdb5ed9d4cb6359a8fb0..bd73e7a9141076389ad638cc8fe4f48515331711 100644 (file)
@@ -5,6 +5,8 @@
 #include <linux/dma-mapping.h>
 #include <linux/mem_encrypt.h>
 
+#define DIRECT_MAPPING_ERROR           0
+
 #ifdef CONFIG_ARCH_HAS_PHYS_TO_DMA
 #include <asm/dma-direct.h>
 #else
index 965be92c33b56a7a77e792961a3794cf3293465e..a387b59640a4b0d7a6edea41ad58205d74042d89 100644 (file)
@@ -67,11 +67,6 @@ extern void swiotlb_tbl_sync_single(struct device *hwdev,
 
 /* Accessory functions. */
 
-void *swiotlb_alloc(struct device *hwdev, size_t size, dma_addr_t *dma_handle,
-               gfp_t flags, unsigned long attrs);
-void swiotlb_free(struct device *dev, size_t size, void *vaddr,
-               dma_addr_t dma_addr, unsigned long attrs);
-
 extern dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
                                   unsigned long offset, size_t size,
                                   enum dma_data_direction dir,
@@ -106,9 +101,6 @@ extern void
 swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg,
                           int nelems, enum dma_data_direction dir);
 
-extern int
-swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr);
-
 extern int
 swiotlb_dma_supported(struct device *hwdev, u64 mask);
 
@@ -121,7 +113,6 @@ static inline unsigned int swiotlb_max_segment(void) { return 0; }
 #endif
 
 extern void swiotlb_print_info(void);
-extern int is_swiotlb_buffer(phys_addr_t paddr);
 extern void swiotlb_set_max_segment(unsigned int);
 
 extern const struct dma_map_ops swiotlb_dma_ops;
index 87a6bc2a96c0c46ac422fbf77f6ca63803752c74..f14c376937e5708d9b0d980130866631012dde8e 100644 (file)
@@ -14,8 +14,6 @@
 #include <linux/pfn.h>
 #include <linux/set_memory.h>
 
-#define DIRECT_MAPPING_ERROR           0
-
 /*
  * Most architectures use ZONE_DMA for the first 16 Megabytes, but
  * some use it for entirely different regions:
index 4f8a6dbf0b60973c875ee8b777be8b0a274c8907..ebecaf255ea29ed204586101e37d4baa9c626893 100644 (file)
@@ -21,6 +21,7 @@
 
 #include <linux/cache.h>
 #include <linux/dma-direct.h>
+#include <linux/dma-noncoherent.h>
 #include <linux/mm.h>
 #include <linux/export.h>
 #include <linux/spinlock.h>
@@ -72,13 +73,6 @@ static phys_addr_t io_tlb_start, io_tlb_end;
  */
 static unsigned long io_tlb_nslabs;
 
-/*
- * When the IOMMU overflows we return a fallback buffer. This sets the size.
- */
-static unsigned long io_tlb_overflow = 32*1024;
-
-static phys_addr_t io_tlb_overflow_buffer;
-
 /*
  * This is a free list describing the number of free entries available from
  * each index
@@ -126,7 +120,6 @@ setup_io_tlb_npages(char *str)
        return 0;
 }
 early_param("swiotlb", setup_io_tlb_npages);
-/* make io_tlb_overflow tunable too? */
 
 unsigned long swiotlb_nr_tbl(void)
 {
@@ -194,16 +187,10 @@ void __init swiotlb_update_mem_attributes(void)
        bytes = PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT);
        set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT);
        memset(vaddr, 0, bytes);
-
-       vaddr = phys_to_virt(io_tlb_overflow_buffer);
-       bytes = PAGE_ALIGN(io_tlb_overflow);
-       set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT);
-       memset(vaddr, 0, bytes);
 }
 
 int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose)
 {
-       void *v_overflow_buffer;
        unsigned long i, bytes;
 
        bytes = nslabs << IO_TLB_SHIFT;
@@ -212,17 +199,6 @@ int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose)
        io_tlb_start = __pa(tlb);
        io_tlb_end = io_tlb_start + bytes;
 
-       /*
-        * Get the overflow emergency buffer
-        */
-       v_overflow_buffer = memblock_virt_alloc_low_nopanic(
-                                               PAGE_ALIGN(io_tlb_overflow),
-                                               PAGE_SIZE);
-       if (!v_overflow_buffer)
-               return -ENOMEM;
-
-       io_tlb_overflow_buffer = __pa(v_overflow_buffer);
-
        /*
         * Allocate and initialize the free list array.  This array is used
         * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE
@@ -330,7 +306,6 @@ int
 swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
 {
        unsigned long i, bytes;
-       unsigned char *v_overflow_buffer;
 
        bytes = nslabs << IO_TLB_SHIFT;
 
@@ -341,19 +316,6 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
        set_memory_decrypted((unsigned long)tlb, bytes >> PAGE_SHIFT);
        memset(tlb, 0, bytes);
 
-       /*
-        * Get the overflow emergency buffer
-        */
-       v_overflow_buffer = (void *)__get_free_pages(GFP_DMA,
-                                                    get_order(io_tlb_overflow));
-       if (!v_overflow_buffer)
-               goto cleanup2;
-
-       set_memory_decrypted((unsigned long)v_overflow_buffer,
-                       io_tlb_overflow >> PAGE_SHIFT);
-       memset(v_overflow_buffer, 0, io_tlb_overflow);
-       io_tlb_overflow_buffer = virt_to_phys(v_overflow_buffer);
-
        /*
         * Allocate and initialize the free list array.  This array is used
         * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE
@@ -390,10 +352,6 @@ cleanup4:
                                                         sizeof(int)));
        io_tlb_list = NULL;
 cleanup3:
-       free_pages((unsigned long)v_overflow_buffer,
-                  get_order(io_tlb_overflow));
-       io_tlb_overflow_buffer = 0;
-cleanup2:
        io_tlb_end = 0;
        io_tlb_start = 0;
        io_tlb_nslabs = 0;
@@ -407,8 +365,6 @@ void __init swiotlb_exit(void)
                return;
 
        if (late_alloc) {
-               free_pages((unsigned long)phys_to_virt(io_tlb_overflow_buffer),
-                          get_order(io_tlb_overflow));
                free_pages((unsigned long)io_tlb_orig_addr,
                           get_order(io_tlb_nslabs * sizeof(phys_addr_t)));
                free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs *
@@ -416,8 +372,6 @@ void __init swiotlb_exit(void)
                free_pages((unsigned long)phys_to_virt(io_tlb_start),
                           get_order(io_tlb_nslabs << IO_TLB_SHIFT));
        } else {
-               memblock_free_late(io_tlb_overflow_buffer,
-                                  PAGE_ALIGN(io_tlb_overflow));
                memblock_free_late(__pa(io_tlb_orig_addr),
                                   PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t)));
                memblock_free_late(__pa(io_tlb_list),
@@ -429,7 +383,7 @@ void __init swiotlb_exit(void)
        max_segment = 0;
 }
 
-int is_swiotlb_buffer(phys_addr_t paddr)
+static int is_swiotlb_buffer(phys_addr_t paddr)
 {
        return paddr >= io_tlb_start && paddr < io_tlb_end;
 }
@@ -590,26 +544,6 @@ found:
        return tlb_addr;
 }
 
-/*
- * Allocates bounce buffer and returns its physical address.
- */
-static phys_addr_t
-map_single(struct device *hwdev, phys_addr_t phys, size_t size,
-          enum dma_data_direction dir, unsigned long attrs)
-{
-       dma_addr_t start_dma_addr;
-
-       if (swiotlb_force == SWIOTLB_NO_FORCE) {
-               dev_warn_ratelimited(hwdev, "Cannot do DMA to address %pa\n",
-                                    &phys);
-               return SWIOTLB_MAP_ERROR;
-       }
-
-       start_dma_addr = __phys_to_dma(hwdev, io_tlb_start);
-       return swiotlb_tbl_map_single(hwdev, start_dma_addr, phys, size,
-                                     dir, attrs);
-}
-
 /*
  * tlb_addr is the physical address of the bounce buffer to unmap.
  */
@@ -689,104 +623,32 @@ void swiotlb_tbl_sync_single(struct device *hwdev, phys_addr_t tlb_addr,
        }
 }
 
-static inline bool dma_coherent_ok(struct device *dev, dma_addr_t addr,
-               size_t size)
-{
-       u64 mask = DMA_BIT_MASK(32);
-
-       if (dev && dev->coherent_dma_mask)
-               mask = dev->coherent_dma_mask;
-       return addr + size - 1 <= mask;
-}
-
-static void *
-swiotlb_alloc_buffer(struct device *dev, size_t size, dma_addr_t *dma_handle,
-               unsigned long attrs)
+static dma_addr_t swiotlb_bounce_page(struct device *dev, phys_addr_t *phys,
+               size_t size, enum dma_data_direction dir, unsigned long attrs)
 {
-       phys_addr_t phys_addr;
-
-       if (swiotlb_force == SWIOTLB_NO_FORCE)
-               goto out_warn;
-
-       phys_addr = swiotlb_tbl_map_single(dev,
-                       __phys_to_dma(dev, io_tlb_start),
-                       0, size, DMA_FROM_DEVICE, attrs);
-       if (phys_addr == SWIOTLB_MAP_ERROR)
-               goto out_warn;
-
-       *dma_handle = __phys_to_dma(dev, phys_addr);
-       if (!dma_coherent_ok(dev, *dma_handle, size))
-               goto out_unmap;
-
-       memset(phys_to_virt(phys_addr), 0, size);
-       return phys_to_virt(phys_addr);
+       dma_addr_t dma_addr;
 
-out_unmap:
-       dev_warn(dev, "hwdev DMA mask = 0x%016Lx, dev_addr = 0x%016Lx\n",
-               (unsigned long long)dev->coherent_dma_mask,
-               (unsigned long long)*dma_handle);
-
-       /*
-        * DMA_TO_DEVICE to avoid memcpy in unmap_single.
-        * DMA_ATTR_SKIP_CPU_SYNC is optional.
-        */
-       swiotlb_tbl_unmap_single(dev, phys_addr, size, DMA_TO_DEVICE,
-                       DMA_ATTR_SKIP_CPU_SYNC);
-out_warn:
-       if (!(attrs & DMA_ATTR_NO_WARN) && printk_ratelimit()) {
-               dev_warn(dev,
-                       "swiotlb: coherent allocation failed, size=%zu\n",
-                       size);
-               dump_stack();
+       if (unlikely(swiotlb_force == SWIOTLB_NO_FORCE)) {
+               dev_warn_ratelimited(dev,
+                       "Cannot do DMA to address %pa\n", phys);
+               return DIRECT_MAPPING_ERROR;
        }
-       return NULL;
-}
-
-static bool swiotlb_free_buffer(struct device *dev, size_t size,
-               dma_addr_t dma_addr)
-{
-       phys_addr_t phys_addr = dma_to_phys(dev, dma_addr);
 
-       WARN_ON_ONCE(irqs_disabled());
-
-       if (!is_swiotlb_buffer(phys_addr))
-               return false;
-
-       /*
-        * DMA_TO_DEVICE to avoid memcpy in swiotlb_tbl_unmap_single.
-        * DMA_ATTR_SKIP_CPU_SYNC is optional.
-        */
-       swiotlb_tbl_unmap_single(dev, phys_addr, size, DMA_TO_DEVICE,
-                                DMA_ATTR_SKIP_CPU_SYNC);
-       return true;
-}
-
-static void
-swiotlb_full(struct device *dev, size_t size, enum dma_data_direction dir,
-            int do_panic)
-{
-       if (swiotlb_force == SWIOTLB_NO_FORCE)
-               return;
-
-       /*
-        * Ran out of IOMMU space for this operation. This is very bad.
-        * Unfortunately the drivers cannot handle this operation properly.
-        * unless they check for dma_mapping_error (most don't)
-        * When the mapping is small enough return a static buffer to limit
-        * the damage, or panic when the transfer is too big.
-        */
-       dev_err_ratelimited(dev, "DMA: Out of SW-IOMMU space for %zu bytes\n",
-                           size);
+       /* Oh well, have to allocate and map a bounce buffer. */
+       *phys = swiotlb_tbl_map_single(dev, __phys_to_dma(dev, io_tlb_start),
+                       *phys, size, dir, attrs);
+       if (*phys == SWIOTLB_MAP_ERROR)
+               return DIRECT_MAPPING_ERROR;
 
-       if (size <= io_tlb_overflow || !do_panic)
-               return;
+       /* Ensure that the address returned is DMA'ble */
+       dma_addr = __phys_to_dma(dev, *phys);
+       if (unlikely(!dma_capable(dev, dma_addr, size))) {
+               swiotlb_tbl_unmap_single(dev, *phys, size, dir,
+                       attrs | DMA_ATTR_SKIP_CPU_SYNC);
+               return DIRECT_MAPPING_ERROR;
+       }
 
-       if (dir == DMA_BIDIRECTIONAL)
-               panic("DMA: Random memory could be DMA accessed\n");
-       if (dir == DMA_FROM_DEVICE)
-               panic("DMA: Random memory could be DMA written\n");
-       if (dir == DMA_TO_DEVICE)
-               panic("DMA: Random memory could be DMA read\n");
+       return dma_addr;
 }
 
 /*
@@ -801,7 +663,7 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
                            enum dma_data_direction dir,
                            unsigned long attrs)
 {
-       phys_addr_t map, phys = page_to_phys(page) + offset;
+       phys_addr_t phys = page_to_phys(page) + offset;
        dma_addr_t dev_addr = phys_to_dma(dev, phys);
 
        BUG_ON(dir == DMA_NONE);
@@ -810,28 +672,17 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
         * we can safely return the device addr and not worry about bounce
         * buffering it.
         */
-       if (dma_capable(dev, dev_addr, size) && swiotlb_force != SWIOTLB_FORCE)
-               return dev_addr;
-
-       trace_swiotlb_bounced(dev, dev_addr, size, swiotlb_force);
-
-       /* Oh well, have to allocate and map a bounce buffer. */
-       map = map_single(dev, phys, size, dir, attrs);
-       if (map == SWIOTLB_MAP_ERROR) {
-               swiotlb_full(dev, size, dir, 1);
-               return __phys_to_dma(dev, io_tlb_overflow_buffer);
+       if (!dma_capable(dev, dev_addr, size) ||
+           swiotlb_force == SWIOTLB_FORCE) {
+               trace_swiotlb_bounced(dev, dev_addr, size, swiotlb_force);
+               dev_addr = swiotlb_bounce_page(dev, &phys, size, dir, attrs);
        }
 
-       dev_addr = __phys_to_dma(dev, map);
+       if (!dev_is_dma_coherent(dev) &&
+           (attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0)
+               arch_sync_dma_for_device(dev, phys, size, dir);
 
-       /* Ensure that the address returned is DMA'ble */
-       if (dma_capable(dev, dev_addr, size))
-               return dev_addr;
-
-       attrs |= DMA_ATTR_SKIP_CPU_SYNC;
-       swiotlb_tbl_unmap_single(dev, map, size, dir, attrs);
-
-       return __phys_to_dma(dev, io_tlb_overflow_buffer);
+       return dev_addr;
 }
 
 /*
@@ -842,14 +693,18 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
  * After this call, reads by the cpu to the buffer are guaranteed to see
  * whatever the device wrote there.
  */
-static void unmap_single(struct device *hwdev, dma_addr_t dev_addr,
-                        size_t size, enum dma_data_direction dir,
-                        unsigned long attrs)
+void swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr,
+                       size_t size, enum dma_data_direction dir,
+                       unsigned long attrs)
 {
        phys_addr_t paddr = dma_to_phys(hwdev, dev_addr);
 
        BUG_ON(dir == DMA_NONE);
 
+       if (!dev_is_dma_coherent(hwdev) &&
+           (attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0)
+               arch_sync_dma_for_cpu(hwdev, paddr, size, dir);
+
        if (is_swiotlb_buffer(paddr)) {
                swiotlb_tbl_unmap_single(hwdev, paddr, size, dir, attrs);
                return;
@@ -867,13 +722,6 @@ static void unmap_single(struct device *hwdev, dma_addr_t dev_addr,
        dma_mark_clean(phys_to_virt(paddr), size);
 }
 
-void swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr,
-                       size_t size, enum dma_data_direction dir,
-                       unsigned long attrs)
-{
-       unmap_single(hwdev, dev_addr, size, dir, attrs);
-}
-
 /*
  * Make physical memory consistent for a single streaming mode DMA translation
  * after a transfer.
@@ -893,15 +741,17 @@ swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr,
 
        BUG_ON(dir == DMA_NONE);
 
-       if (is_swiotlb_buffer(paddr)) {
+       if (!dev_is_dma_coherent(hwdev) && target == SYNC_FOR_CPU)
+               arch_sync_dma_for_cpu(hwdev, paddr, size, dir);
+
+       if (is_swiotlb_buffer(paddr))
                swiotlb_tbl_sync_single(hwdev, paddr, size, dir, target);
-               return;
-       }
 
-       if (dir != DMA_FROM_DEVICE)
-               return;
+       if (!dev_is_dma_coherent(hwdev) && target == SYNC_FOR_DEVICE)
+               arch_sync_dma_for_device(hwdev, paddr, size, dir);
 
-       dma_mark_clean(phys_to_virt(paddr), size);
+       if (!is_swiotlb_buffer(paddr) && dir == DMA_FROM_DEVICE)
+               dma_mark_clean(phys_to_virt(paddr), size);
 }
 
 void
@@ -925,48 +775,31 @@ swiotlb_sync_single_for_device(struct device *hwdev, dma_addr_t dev_addr,
  * appropriate dma address and length.  They are obtained via
  * sg_dma_{address,length}(SG).
  *
- * NOTE: An implementation may be able to use a smaller number of
- *       DMA address/length pairs than there are SG table elements.
- *       (for example via virtual mapping capabilities)
- *       The routine returns the number of addr/length pairs actually
- *       used, at most nents.
- *
  * Device ownership issues as mentioned above for swiotlb_map_page are the
  * same here.
  */
 int
-swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems,
+swiotlb_map_sg_attrs(struct device *dev, struct scatterlist *sgl, int nelems,
                     enum dma_data_direction dir, unsigned long attrs)
 {
        struct scatterlist *sg;
        int i;
 
-       BUG_ON(dir == DMA_NONE);
-
        for_each_sg(sgl, sg, nelems, i) {
-               phys_addr_t paddr = sg_phys(sg);
-               dma_addr_t dev_addr = phys_to_dma(hwdev, paddr);
-
-               if (swiotlb_force == SWIOTLB_FORCE ||
-                   !dma_capable(hwdev, dev_addr, sg->length)) {
-                       phys_addr_t map = map_single(hwdev, sg_phys(sg),
-                                                    sg->length, dir, attrs);
-                       if (map == SWIOTLB_MAP_ERROR) {
-                               /* Don't panic here, we expect map_sg users
-                                  to do proper error handling. */
-                               swiotlb_full(hwdev, sg->length, dir, 0);
-                               attrs |= DMA_ATTR_SKIP_CPU_SYNC;
-                               swiotlb_unmap_sg_attrs(hwdev, sgl, i, dir,
-                                                      attrs);
-                               sg_dma_len(sgl) = 0;
-                               return 0;
-                       }
-                       sg->dma_address = __phys_to_dma(hwdev, map);
-               } else
-                       sg->dma_address = dev_addr;
+               sg->dma_address = swiotlb_map_page(dev, sg_page(sg), sg->offset,
+                               sg->length, dir, attrs);
+               if (sg->dma_address == DIRECT_MAPPING_ERROR)
+                       goto out_error;
                sg_dma_len(sg) = sg->length;
        }
+
        return nelems;
+
+out_error:
+       swiotlb_unmap_sg_attrs(dev, sgl, i, dir,
+                       attrs | DMA_ATTR_SKIP_CPU_SYNC);
+       sg_dma_len(sgl) = 0;
+       return 0;
 }
 
 /*
@@ -984,7 +817,7 @@ swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl,
        BUG_ON(dir == DMA_NONE);
 
        for_each_sg(sgl, sg, nelems, i)
-               unmap_single(hwdev, sg->dma_address, sg_dma_len(sg), dir,
+               swiotlb_unmap_page(hwdev, sg->dma_address, sg_dma_len(sg), dir,
                             attrs);
 }
 
@@ -1022,12 +855,6 @@ swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg,
        swiotlb_sync_sg(hwdev, sg, nelems, dir, SYNC_FOR_DEVICE);
 }
 
-int
-swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr)
-{
-       return (dma_addr == __phys_to_dma(hwdev, io_tlb_overflow_buffer));
-}
-
 /*
  * Return whether the given device DMA address mask can be supported
  * properly.  For example, if your device can only drive the low 24-bits
@@ -1040,39 +867,10 @@ swiotlb_dma_supported(struct device *hwdev, u64 mask)
        return __phys_to_dma(hwdev, io_tlb_end - 1) <= mask;
 }
 
-void *swiotlb_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
-               gfp_t gfp, unsigned long attrs)
-{
-       void *vaddr;
-
-       /* temporary workaround: */
-       if (gfp & __GFP_NOWARN)
-               attrs |= DMA_ATTR_NO_WARN;
-
-       /*
-        * Don't print a warning when the first allocation attempt fails.
-        * swiotlb_alloc_coherent() will print a warning when the DMA memory
-        * allocation ultimately failed.
-        */
-       gfp |= __GFP_NOWARN;
-
-       vaddr = dma_direct_alloc(dev, size, dma_handle, gfp, attrs);
-       if (!vaddr)
-               vaddr = swiotlb_alloc_buffer(dev, size, dma_handle, attrs);
-       return vaddr;
-}
-
-void swiotlb_free(struct device *dev, size_t size, void *vaddr,
-               dma_addr_t dma_addr, unsigned long attrs)
-{
-       if (!swiotlb_free_buffer(dev, size, dma_addr))
-               dma_direct_free(dev, size, vaddr, dma_addr, attrs);
-}
-
 const struct dma_map_ops swiotlb_dma_ops = {
-       .mapping_error          = swiotlb_dma_mapping_error,
-       .alloc                  = swiotlb_alloc,
-       .free                   = swiotlb_free,
+       .mapping_error          = dma_direct_mapping_error,
+       .alloc                  = dma_direct_alloc,
+       .free                   = dma_direct_free,
        .sync_single_for_cpu    = swiotlb_sync_single_for_cpu,
        .sync_single_for_device = swiotlb_sync_single_for_device,
        .sync_sg_for_cpu        = swiotlb_sync_sg_for_cpu,