Merge tag 'dma-mapping-5.3' of git://git.infradead.org/users/hch/dma-mapping
authorLinus Torvalds <torvalds@linux-foundation.org>
Fri, 12 Jul 2019 22:13:55 +0000 (15:13 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 12 Jul 2019 22:13:55 +0000 (15:13 -0700)
Pull dma-mapping updates from Christoph Hellwig:

 - move the USB special case that bounced DMA through a device bar into
   the USB code instead of handling it in the common DMA code (Laurentiu
   Tudor and Fredrik Noring)

 - don't dip into the global CMA pool for single page allocations
   (Nicolin Chen)

 - fix a crash when allocating memory for the atomic pool failed during
   boot (Florian Fainelli)

 - move support for MIPS-style uncached segments to the common code and
   use that for MIPS and nios2 (me)

 - make support for DMA_ATTR_NON_CONSISTENT and
   DMA_ATTR_NO_KERNEL_MAPPING generic (me)

 - convert nds32 to the generic remapping allocator (me)

* tag 'dma-mapping-5.3' of git://git.infradead.org/users/hch/dma-mapping: (29 commits)
  dma-mapping: mark dma_alloc_need_uncached as __always_inline
  MIPS: only select ARCH_HAS_UNCACHED_SEGMENT for non-coherent platforms
  usb: host: Fix excessive alignment restriction for local memory allocations
  lib/genalloc.c: Add algorithm, align and zeroed family of DMA allocators
  nios2: use the generic uncached segment support in dma-direct
  nds32: use the generic remapping allocator for coherent DMA allocations
  arc: use the generic remapping allocator for coherent DMA allocations
  dma-direct: handle DMA_ATTR_NO_KERNEL_MAPPING in common code
  dma-direct: handle DMA_ATTR_NON_CONSISTENT in common code
  dma-mapping: add a dma_alloc_need_uncached helper
  openrisc: remove the partial DMA_ATTR_NON_CONSISTENT support
  arc: remove the partial DMA_ATTR_NON_CONSISTENT support
  arm-nommu: remove the partial DMA_ATTR_NON_CONSISTENT support
  ARM: dma-mapping: allow larger DMA mask than supported
  dma-mapping: truncate dma masks to what dma_addr_t can hold
  iommu/dma: Apply dma_{alloc,free}_contiguous functions
  dma-remap: Avoid de-referencing NULL atomic_pool
  MIPS: use the generic uncached segment support in dma-direct
  dma-direct: provide generic support for uncached kernel segments
  au1100fb: fix DMA API abuse
  ...

41 files changed:
arch/Kconfig
arch/arc/Kconfig
arch/arc/mm/dma.c
arch/arm/mm/dma-mapping-nommu.c
arch/arm/mm/dma-mapping.c
arch/mips/Kconfig
arch/mips/include/asm/page.h
arch/mips/jazz/jazzdma.c
arch/mips/mm/cache.c
arch/mips/mm/dma-noncoherent.c
arch/nds32/Kconfig
arch/nds32/kernel/dma.c
arch/nios2/Kconfig
arch/nios2/include/asm/page.h
arch/nios2/mm/dma-mapping.c
arch/openrisc/kernel/dma.c
arch/parisc/kernel/pci-dma.c
arch/xtensa/kernel/pci-dma.c
drivers/iommu/dma-iommu.c
drivers/usb/Kconfig
drivers/usb/core/buffer.c
drivers/usb/core/hcd.c
drivers/usb/host/ehci-hcd.c
drivers/usb/host/fotg210-hcd.c
drivers/usb/host/ohci-hcd.c
drivers/usb/host/ohci-mem.c
drivers/usb/host/ohci-sm501.c
drivers/usb/host/ohci-tmio.c
drivers/usb/host/ohci.h
drivers/usb/host/uhci-hcd.c
drivers/video/fbdev/au1100fb.c
drivers/video/fbdev/au1100fb.h
include/linux/dma-contiguous.h
include/linux/dma-noncoherent.h
include/linux/genalloc.h
include/linux/usb/hcd.h
kernel/dma/contiguous.c
kernel/dma/direct.c
kernel/dma/mapping.c
kernel/dma/remap.c
lib/genalloc.c

index c47b328eada033257e30c89a0f1678d030b5b95f..e8d19c3cb91f226adf2a6444e61eca4c75b406c7 100644 (file)
@@ -260,6 +260,14 @@ config ARCH_HAS_SET_MEMORY
 config ARCH_HAS_SET_DIRECT_MAP
        bool
 
+#
+# Select if arch has an uncached kernel segment and provides the
+# uncached_kernel_address / cached_kernel_address symbols to use it
+#
+config ARCH_HAS_UNCACHED_SEGMENT
+       select ARCH_HAS_DMA_PREP_COHERENT
+       bool
+
 # Select if arch init_task must go in the __init_task_data section
 config ARCH_TASK_STRUCT_ON_STACK
        bool
index 1c8137e7247b40526de5d703977e3cf7cbcbf148..8383155c8c824f486c2cadd7e46178ca627bfe5a 100644 (file)
@@ -7,6 +7,7 @@ config ARC
        def_bool y
        select ARC_TIMERS
        select ARCH_HAS_DMA_COHERENT_TO_PFN
+       select ARCH_HAS_DMA_PREP_COHERENT
        select ARCH_HAS_PTE_SPECIAL
        select ARCH_HAS_SETUP_DMA_OPS
        select ARCH_HAS_SYNC_DMA_FOR_CPU
@@ -16,6 +17,7 @@ config ARC
        select BUILDTIME_EXTABLE_SORT
        select CLONE_BACKWARDS
        select COMMON_CLK
+       select DMA_DIRECT_REMAP
        select GENERIC_ATOMIC64 if !ISA_ARCV2 || !(ARC_HAS_LL64 && ARC_HAS_LLSC)
        select GENERIC_CLOCKEVENTS
        select GENERIC_FIND_FIRST_BIT
index 0bf1468c35a38daa707546e40cfde9a6a376ae48..62c210e7ee4cdc5046b422d819a997e03134652f 100644 (file)
@@ -8,51 +8,15 @@
 #include <asm/cacheflush.h>
 
 /*
- * ARCH specific callbacks for generic noncoherent DMA ops (dma/noncoherent.c)
+ * ARCH specific callbacks for generic noncoherent DMA ops
  *  - hardware IOC not available (or "dma-coherent" not set for device in DT)
  *  - But still handle both coherent and non-coherent requests from caller
  *
  * For DMA coherent hardware (IOC) generic code suffices
  */
-void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
-               gfp_t gfp, unsigned long attrs)
-{
-       unsigned long order = get_order(size);
-       struct page *page;
-       phys_addr_t paddr;
-       void *kvaddr;
-       bool need_coh = !(attrs & DMA_ATTR_NON_CONSISTENT);
-
-       /*
-        * __GFP_HIGHMEM flag is cleared by upper layer functions
-        * (in include/linux/dma-mapping.h) so we should never get a
-        * __GFP_HIGHMEM here.
-        */
-       BUG_ON(gfp & __GFP_HIGHMEM);
-
-       page = alloc_pages(gfp | __GFP_ZERO, order);
-       if (!page)
-               return NULL;
-
-       /* This is linear addr (0x8000_0000 based) */
-       paddr = page_to_phys(page);
-
-       *dma_handle = paddr;
-
-       /*
-        * A coherent buffer needs MMU mapping to enforce non-cachability.
-        * kvaddr is kernel Virtual address (0x7000_0000 based).
-        */
-       if (need_coh) {
-               kvaddr = ioremap_nocache(paddr, size);
-               if (kvaddr == NULL) {
-                       __free_pages(page, order);
-                       return NULL;
-               }
-       } else {
-               kvaddr = (void *)(u32)paddr;
-       }
 
+void arch_dma_prep_coherent(struct page *page, size_t size)
+{
        /*
         * Evict any existing L1 and/or L2 lines for the backing page
         * in case it was used earlier as a normal "cached" page.
@@ -63,28 +27,7 @@ void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
         * Currently flush_cache_vmap nukes the L1 cache completely which
         * will be optimized as a separate commit
         */
-       if (need_coh)
-               dma_cache_wback_inv(paddr, size);
-
-       return kvaddr;
-}
-
-void arch_dma_free(struct device *dev, size_t size, void *vaddr,
-               dma_addr_t dma_handle, unsigned long attrs)
-{
-       phys_addr_t paddr = dma_handle;
-       struct page *page = virt_to_page(paddr);
-
-       if (!(attrs & DMA_ATTR_NON_CONSISTENT))
-               iounmap((void __force __iomem *)vaddr);
-
-       __free_pages(page, get_order(size));
-}
-
-long arch_dma_coherent_to_pfn(struct device *dev, void *cpu_addr,
-               dma_addr_t dma_addr)
-{
-       return __phys_to_pfn(dma_addr);
+       dma_cache_wback_inv(page_to_phys(page), size);
 }
 
 /*
@@ -161,3 +104,9 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
        dev_info(dev, "use %sncoherent DMA ops\n",
                 dev->dma_coherent ? "" : "non");
 }
+
+static int __init atomic_pool_init(void)
+{
+       return dma_atomic_pool_init(GFP_KERNEL, pgprot_noncached(PAGE_KERNEL));
+}
+postcore_initcall(atomic_pool_init);
index 1aea01ba12628463a0942a26d9cc740d9f0db081..52b82559d99b3d608b4347a4060b7e686311d72a 100644 (file)
@@ -35,18 +35,7 @@ static void *arm_nommu_dma_alloc(struct device *dev, size_t size,
                                 unsigned long attrs)
 
 {
-       void *ret;
-
-       /*
-        * Try generic allocator first if we are advertised that
-        * consistency is not required.
-        */
-
-       if (attrs & DMA_ATTR_NON_CONSISTENT)
-               return dma_direct_alloc_pages(dev, size, dma_handle, gfp,
-                               attrs);
-
-       ret = dma_alloc_from_global_coherent(size, dma_handle);
+       void *ret = dma_alloc_from_global_coherent(size, dma_handle);
 
        /*
         * dma_alloc_from_global_coherent() may fail because:
@@ -66,16 +55,9 @@ static void arm_nommu_dma_free(struct device *dev, size_t size,
                               void *cpu_addr, dma_addr_t dma_addr,
                               unsigned long attrs)
 {
-       if (attrs & DMA_ATTR_NON_CONSISTENT) {
-               dma_direct_free_pages(dev, size, cpu_addr, dma_addr, attrs);
-       } else {
-               int ret = dma_release_from_global_coherent(get_order(size),
-                                                          cpu_addr);
-
-               WARN_ON_ONCE(ret == 0);
-       }
+       int ret = dma_release_from_global_coherent(get_order(size), cpu_addr);
 
-       return;
+       WARN_ON_ONCE(ret == 0);
 }
 
 static int arm_nommu_dma_mmap(struct device *dev, struct vm_area_struct *vma,
index 1fb5c0ca1ed8e8cf853732fb5b7a40d37ee2cd19..4789c60a86e34552411367282be7309f0d8f779a 100644 (file)
@@ -216,25 +216,7 @@ EXPORT_SYMBOL(arm_coherent_dma_ops);
 
 static int __dma_supported(struct device *dev, u64 mask, bool warn)
 {
-       unsigned long max_dma_pfn;
-
-       /*
-        * If the mask allows for more memory than we can address,
-        * and we actually have that much memory, then we must
-        * indicate that DMA to this device is not supported.
-        */
-       if (sizeof(mask) != sizeof(dma_addr_t) &&
-           mask > (dma_addr_t)~0 &&
-           dma_to_pfn(dev, ~0) < max_pfn - 1) {
-               if (warn) {
-                       dev_warn(dev, "Coherent DMA mask %#llx is larger than dma_addr_t allows\n",
-                                mask);
-                       dev_warn(dev, "Driver did not use or check the return value from dma_set_coherent_mask()?\n");
-               }
-               return 0;
-       }
-
-       max_dma_pfn = min(max_pfn, arm_dma_pfn_limit);
+       unsigned long max_dma_pfn = min(max_pfn, arm_dma_pfn_limit);
 
        /*
         * Translate the device's DMA mask to a PFN limit.  This
index 7957d3457156abdaf7fc03483ebde93124d04f65..d50fafd7bf3aed0fac0729312b2d3aecd905e0c4 100644 (file)
@@ -1121,6 +1121,7 @@ config DMA_NONCOHERENT
        bool
        select ARCH_HAS_DMA_MMAP_PGPROT
        select ARCH_HAS_SYNC_DMA_FOR_DEVICE
+       select ARCH_HAS_UNCACHED_SEGMENT
        select NEED_DMA_MAP_STATE
        select ARCH_HAS_DMA_COHERENT_TO_PFN
        select DMA_NONCOHERENT_CACHE_SYNC
index a25643d258cb55be5830d262590f014f1d064d07..0ba4ce6e2bf3ae3b5bfa7d366ebd4f6723baf138 100644 (file)
@@ -258,9 +258,6 @@ extern bool __virt_addr_valid(const volatile void *kaddr);
         ((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0) | \
         VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
 
-#define UNCAC_ADDR(addr)       (UNCAC_BASE + __pa(addr))
-#define CAC_ADDR(addr)         ((unsigned long)__va((addr) - UNCAC_BASE))
-
 #include <asm-generic/memory_model.h>
 #include <asm-generic/getorder.h>
 
index bedb5047aff3de0c725b0f2029ed2e177277d727..1804dc9d8136fb72b50fdc88e22f11be96b191f0 100644 (file)
@@ -575,10 +575,6 @@ static void *jazz_dma_alloc(struct device *dev, size_t size,
                return NULL;
        }
 
-       if (!(attrs & DMA_ATTR_NON_CONSISTENT)) {
-               dma_cache_wback_inv((unsigned long)ret, size);
-               ret = (void *)UNCAC_ADDR(ret);
-       }
        return ret;
 }
 
@@ -586,8 +582,6 @@ static void jazz_dma_free(struct device *dev, size_t size, void *vaddr,
                dma_addr_t dma_handle, unsigned long attrs)
 {
        vdma_free(dma_handle);
-       if (!(attrs & DMA_ATTR_NON_CONSISTENT))
-               vaddr = (void *)CAC_ADDR((unsigned long)vaddr);
        dma_direct_free_pages(dev, size, vaddr, dma_handle, attrs);
 }
 
index 3da216988672492a17cb1eec9910874d58476713..33b409391ddb6ba394da60a84c75f8a5d5091ad8 100644 (file)
@@ -62,8 +62,6 @@ void (*_dma_cache_wback_inv)(unsigned long start, unsigned long size);
 void (*_dma_cache_wback)(unsigned long start, unsigned long size);
 void (*_dma_cache_inv)(unsigned long start, unsigned long size);
 
-EXPORT_SYMBOL(_dma_cache_wback_inv);
-
 #endif /* CONFIG_DMA_NONCOHERENT */
 
 /*
index f9549d2fbea3169e216381397df3e203b5aca325..ed56c6fa7be29e73348fef1da07fb0fb82470fbc 100644 (file)
@@ -44,33 +44,25 @@ static inline bool cpu_needs_post_dma_flush(struct device *dev)
        }
 }
 
-void *arch_dma_alloc(struct device *dev, size_t size,
-               dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs)
+void arch_dma_prep_coherent(struct page *page, size_t size)
 {
-       void *ret;
-
-       ret = dma_direct_alloc_pages(dev, size, dma_handle, gfp, attrs);
-       if (ret && !(attrs & DMA_ATTR_NON_CONSISTENT)) {
-               dma_cache_wback_inv((unsigned long) ret, size);
-               ret = (void *)UNCAC_ADDR(ret);
-       }
+       dma_cache_wback_inv((unsigned long)page_address(page), size);
+}
 
-       return ret;
+void *uncached_kernel_address(void *addr)
+{
+       return (void *)(__pa(addr) + UNCAC_BASE);
 }
 
-void arch_dma_free(struct device *dev, size_t size, void *cpu_addr,
-               dma_addr_t dma_addr, unsigned long attrs)
+void *cached_kernel_address(void *addr)
 {
-       if (!(attrs & DMA_ATTR_NON_CONSISTENT))
-               cpu_addr = (void *)CAC_ADDR((unsigned long)cpu_addr);
-       dma_direct_free_pages(dev, size, cpu_addr, dma_addr, attrs);
+       return __va(addr) - UNCAC_BASE;
 }
 
 long arch_dma_coherent_to_pfn(struct device *dev, void *cpu_addr,
                dma_addr_t dma_addr)
 {
-       unsigned long addr = CAC_ADDR((unsigned long)cpu_addr);
-       return page_to_pfn(virt_to_page((void *)addr));
+       return page_to_pfn(virt_to_page(cached_kernel_address(cpu_addr)));
 }
 
 pgprot_t arch_dma_mmap_pgprot(struct device *dev, pgprot_t prot,
index fd0d0639454f03c575275a83c613c630153762ba..fbd68329737f4cdf476f126ed3d7e0b7534d7c30 100644 (file)
@@ -7,12 +7,14 @@
 config NDS32
        def_bool y
        select ARCH_32BIT_OFF_T
+       select ARCH_HAS_DMA_PREP_COHERENT
        select ARCH_HAS_SYNC_DMA_FOR_CPU
        select ARCH_HAS_SYNC_DMA_FOR_DEVICE
        select ARCH_WANT_FRAME_POINTERS if FTRACE
        select CLKSRC_MMIO
        select CLONE_BACKWARDS
        select COMMON_CLK
+       select DMA_DIRECT_REMAP
        select GENERIC_ATOMIC64
        select GENERIC_CPU_DEVICES
        select GENERIC_CLOCKEVENTS
index d0dbd4fe96454557ffd3b187780c079dc41f97de..490e3720d69442c7cdc0fe9a0706b5d23ab64e67 100644 (file)
 
 #include <linux/types.h>
 #include <linux/mm.h>
-#include <linux/string.h>
 #include <linux/dma-noncoherent.h>
-#include <linux/io.h>
 #include <linux/cache.h>
 #include <linux/highmem.h>
-#include <linux/slab.h>
 #include <asm/cacheflush.h>
 #include <asm/tlbflush.h>
 #include <asm/proc-fns.h>
 
-/*
- * This is the page table (2MB) covering uncached, DMA consistent allocations
- */
-static pte_t *consistent_pte;
-static DEFINE_RAW_SPINLOCK(consistent_lock);
-
-/*
- * VM region handling support.
- *
- * This should become something generic, handling VM region allocations for
- * vmalloc and similar (ioremap, module space, etc).
- *
- * I envisage vmalloc()'s supporting vm_struct becoming:
- *
- *  struct vm_struct {
- *    struct vm_region region;
- *    unsigned long    flags;
- *    struct page      **pages;
- *    unsigned int     nr_pages;
- *    unsigned long    phys_addr;
- *  };
- *
- * get_vm_area() would then call vm_region_alloc with an appropriate
- * struct vm_region head (eg):
- *
- *  struct vm_region vmalloc_head = {
- *     .vm_list        = LIST_HEAD_INIT(vmalloc_head.vm_list),
- *     .vm_start       = VMALLOC_START,
- *     .vm_end         = VMALLOC_END,
- *  };
- *
- * However, vmalloc_head.vm_start is variable (typically, it is dependent on
- * the amount of RAM found at boot time.)  I would imagine that get_vm_area()
- * would have to initialise this each time prior to calling vm_region_alloc().
- */
-struct arch_vm_region {
-       struct list_head vm_list;
-       unsigned long vm_start;
-       unsigned long vm_end;
-       struct page *vm_pages;
-};
-
-static struct arch_vm_region consistent_head = {
-       .vm_list = LIST_HEAD_INIT(consistent_head.vm_list),
-       .vm_start = CONSISTENT_BASE,
-       .vm_end = CONSISTENT_END,
-};
-
-static struct arch_vm_region *vm_region_alloc(struct arch_vm_region *head,
-                                             size_t size, int gfp)
-{
-       unsigned long addr = head->vm_start, end = head->vm_end - size;
-       unsigned long flags;
-       struct arch_vm_region *c, *new;
-
-       new = kmalloc(sizeof(struct arch_vm_region), gfp);
-       if (!new)
-               goto out;
-
-       raw_spin_lock_irqsave(&consistent_lock, flags);
-
-       list_for_each_entry(c, &head->vm_list, vm_list) {
-               if ((addr + size) < addr)
-                       goto nospc;
-               if ((addr + size) <= c->vm_start)
-                       goto found;
-               addr = c->vm_end;
-               if (addr > end)
-                       goto nospc;
-       }
-
-found:
-       /*
-        * Insert this entry _before_ the one we found.
-        */
-       list_add_tail(&new->vm_list, &c->vm_list);
-       new->vm_start = addr;
-       new->vm_end = addr + size;
-
-       raw_spin_unlock_irqrestore(&consistent_lock, flags);
-       return new;
-
-nospc:
-       raw_spin_unlock_irqrestore(&consistent_lock, flags);
-       kfree(new);
-out:
-       return NULL;
-}
-
-static struct arch_vm_region *vm_region_find(struct arch_vm_region *head,
-                                            unsigned long addr)
-{
-       struct arch_vm_region *c;
-
-       list_for_each_entry(c, &head->vm_list, vm_list) {
-               if (c->vm_start == addr)
-                       goto out;
-       }
-       c = NULL;
-out:
-       return c;
-}
-
-void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
-               gfp_t gfp, unsigned long attrs)
-{
-       struct page *page;
-       struct arch_vm_region *c;
-       unsigned long order;
-       u64 mask = ~0ULL, limit;
-       pgprot_t prot = pgprot_noncached(PAGE_KERNEL);
-
-       if (!consistent_pte) {
-               pr_err("%s: not initialized\n", __func__);
-               dump_stack();
-               return NULL;
-       }
-
-       if (dev) {
-               mask = dev->coherent_dma_mask;
-
-               /*
-                * Sanity check the DMA mask - it must be non-zero, and
-                * must be able to be satisfied by a DMA allocation.
-                */
-               if (mask == 0) {
-                       dev_warn(dev, "coherent DMA mask is unset\n");
-                       goto no_page;
-               }
-
-       }
-
-       /*
-        * Sanity check the allocation size.
-        */
-       size = PAGE_ALIGN(size);
-       limit = (mask + 1) & ~mask;
-       if ((limit && size >= limit) ||
-           size >= (CONSISTENT_END - CONSISTENT_BASE)) {
-               pr_warn("coherent allocation too big "
-                       "(requested %#x mask %#llx)\n", size, mask);
-               goto no_page;
-       }
-
-       order = get_order(size);
-
-       if (mask != 0xffffffff)
-               gfp |= GFP_DMA;
-
-       page = alloc_pages(gfp, order);
-       if (!page)
-               goto no_page;
-
-       /*
-        * Invalidate any data that might be lurking in the
-        * kernel direct-mapped region for device DMA.
-        */
-       {
-               unsigned long kaddr = (unsigned long)page_address(page);
-               memset(page_address(page), 0, size);
-               cpu_dma_wbinval_range(kaddr, kaddr + size);
-       }
-
-       /*
-        * Allocate a virtual address in the consistent mapping region.
-        */
-       c = vm_region_alloc(&consistent_head, size,
-                           gfp & ~(__GFP_DMA | __GFP_HIGHMEM));
-       if (c) {
-               pte_t *pte = consistent_pte + CONSISTENT_OFFSET(c->vm_start);
-               struct page *end = page + (1 << order);
-
-               c->vm_pages = page;
-
-               /*
-                * Set the "dma handle"
-                */
-               *handle = page_to_phys(page);
-
-               do {
-                       BUG_ON(!pte_none(*pte));
-
-                       /*
-                        * x86 does not mark the pages reserved...
-                        */
-                       SetPageReserved(page);
-                       set_pte(pte, mk_pte(page, prot));
-                       page++;
-                       pte++;
-               } while (size -= PAGE_SIZE);
-
-               /*
-                * Free the otherwise unused pages.
-                */
-               while (page < end) {
-                       __free_page(page);
-                       page++;
-               }
-
-               return (void *)c->vm_start;
-       }
-
-       if (page)
-               __free_pages(page, order);
-no_page:
-       *handle = ~0;
-       return NULL;
-}
-
-void arch_dma_free(struct device *dev, size_t size, void *cpu_addr,
-               dma_addr_t handle, unsigned long attrs)
-{
-       struct arch_vm_region *c;
-       unsigned long flags, addr;
-       pte_t *ptep;
-
-       size = PAGE_ALIGN(size);
-
-       raw_spin_lock_irqsave(&consistent_lock, flags);
-
-       c = vm_region_find(&consistent_head, (unsigned long)cpu_addr);
-       if (!c)
-               goto no_area;
-
-       if ((c->vm_end - c->vm_start) != size) {
-               pr_err("%s: freeing wrong coherent size (%ld != %d)\n",
-                      __func__, c->vm_end - c->vm_start, size);
-               dump_stack();
-               size = c->vm_end - c->vm_start;
-       }
-
-       ptep = consistent_pte + CONSISTENT_OFFSET(c->vm_start);
-       addr = c->vm_start;
-       do {
-               pte_t pte = ptep_get_and_clear(&init_mm, addr, ptep);
-               unsigned long pfn;
-
-               ptep++;
-               addr += PAGE_SIZE;
-
-               if (!pte_none(pte) && pte_present(pte)) {
-                       pfn = pte_pfn(pte);
-
-                       if (pfn_valid(pfn)) {
-                               struct page *page = pfn_to_page(pfn);
-
-                               /*
-                                * x86 does not mark the pages reserved...
-                                */
-                               ClearPageReserved(page);
-
-                               __free_page(page);
-                               continue;
-                       }
-               }
-
-               pr_crit("%s: bad page in kernel page table\n", __func__);
-       } while (size -= PAGE_SIZE);
-
-       flush_tlb_kernel_range(c->vm_start, c->vm_end);
-
-       list_del(&c->vm_list);
-
-       raw_spin_unlock_irqrestore(&consistent_lock, flags);
-
-       kfree(c);
-       return;
-
-no_area:
-       raw_spin_unlock_irqrestore(&consistent_lock, flags);
-       pr_err("%s: trying to free invalid coherent area: %p\n",
-              __func__, cpu_addr);
-       dump_stack();
-}
-
-/*
- * Initialise the consistent memory allocation.
- */
-static int __init consistent_init(void)
-{
-       pgd_t *pgd;
-       pmd_t *pmd;
-       pte_t *pte;
-       int ret = 0;
-
-       do {
-               pgd = pgd_offset(&init_mm, CONSISTENT_BASE);
-               pmd = pmd_alloc(&init_mm, pgd, CONSISTENT_BASE);
-               if (!pmd) {
-                       pr_err("%s: no pmd tables\n", __func__);
-                       ret = -ENOMEM;
-                       break;
-               }
-               /* The first level mapping may be created in somewhere.
-                * It's not necessary to warn here. */
-               /* WARN_ON(!pmd_none(*pmd)); */
-
-               pte = pte_alloc_kernel(pmd, CONSISTENT_BASE);
-               if (!pte) {
-                       ret = -ENOMEM;
-                       break;
-               }
-
-               consistent_pte = pte;
-       } while (0);
-
-       return ret;
-}
-
-core_initcall(consistent_init);
-
 static inline void cache_op(phys_addr_t paddr, size_t size,
                void (*fn)(unsigned long start, unsigned long end))
 {
@@ -389,3 +75,14 @@ void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr,
                BUG();
        }
 }
+
+void arch_dma_prep_coherent(struct page *page, size_t size)
+{
+       cache_op(page_to_phys(page), size, cpu_dma_wbinval_range);
+}
+
+static int __init atomic_pool_init(void)
+{
+       return dma_atomic_pool_init(GFP_KERNEL, pgprot_noncached(PAGE_KERNEL));
+}
+postcore_initcall(atomic_pool_init);
index 26a9c760a98bbcc36d43f6599326addcfeace594..44b5da37e8bdc1b4fb123a0344f254b0c6ccf65a 100644 (file)
@@ -4,6 +4,7 @@ config NIOS2
        select ARCH_32BIT_OFF_T
        select ARCH_HAS_SYNC_DMA_FOR_CPU
        select ARCH_HAS_SYNC_DMA_FOR_DEVICE
+       select ARCH_HAS_UNCACHED_SEGMENT
        select ARCH_NO_SWAP
        select TIMER_OF
        select GENERIC_ATOMIC64
index f1fbdc47bdafd64d2e5f30923790729c6bd0afc0..79fcac61f6efb559c552e85291876891b6da575a 100644 (file)
@@ -101,12 +101,6 @@ static inline bool pfn_valid(unsigned long pfn)
 # define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | \
                                 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
 
-# define UNCAC_ADDR(addr)      \
-       ((void *)((unsigned)(addr) | CONFIG_NIOS2_IO_REGION_BASE))
-# define CAC_ADDR(addr)                \
-       ((void *)(((unsigned)(addr) & ~CONFIG_NIOS2_IO_REGION_BASE) |   \
-               CONFIG_NIOS2_KERNEL_REGION_BASE))
-
 #include <asm-generic/memory_model.h>
 
 #include <asm-generic/getorder.h>
index 4af9e5b5ba1c72c553fc4354b2342cbecc8f15f0..9cb238664584c6cef9b96809db406fed93beb2ec 100644 (file)
@@ -60,32 +60,28 @@ void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr,
        }
 }
 
-void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
-               gfp_t gfp, unsigned long attrs)
+void arch_dma_prep_coherent(struct page *page, size_t size)
 {
-       void *ret;
+       unsigned long start = (unsigned long)page_address(page);
 
-       /* optimized page clearing */
-       gfp |= __GFP_ZERO;
+       flush_dcache_range(start, start + size);
+}
 
-       if (dev == NULL || (dev->coherent_dma_mask < 0xffffffff))
-               gfp |= GFP_DMA;
+void *uncached_kernel_address(void *ptr)
+{
+       unsigned long addr = (unsigned long)ptr;
 
-       ret = (void *) __get_free_pages(gfp, get_order(size));
-       if (ret != NULL) {
-               *dma_handle = virt_to_phys(ret);
-               flush_dcache_range((unsigned long) ret,
-                       (unsigned long) ret + size);
-               ret = UNCAC_ADDR(ret);
-       }
+       addr |= CONFIG_NIOS2_IO_REGION_BASE;
 
-       return ret;
+       return (void *)ptr;
 }
 
-void arch_dma_free(struct device *dev, size_t size, void *vaddr,
-               dma_addr_t dma_handle, unsigned long attrs)
+void *cached_kernel_address(void *ptr)
 {
-       unsigned long addr = (unsigned long) CAC_ADDR((unsigned long) vaddr);
+       unsigned long addr = (unsigned long)ptr;
+
+       addr &= ~CONFIG_NIOS2_IO_REGION_BASE;
+       addr |= CONFIG_NIOS2_KERNEL_REGION_BASE;
 
-       free_pages(addr, get_order(size));
+       return (void *)ptr;
 }
index 43e340c4cd9c9b06e89ed82582881ea42e42afde..b41a79fcdbd93d749ebc9b4fef29078266c36dee 100644 (file)
@@ -94,15 +94,13 @@ arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
 
        va = (unsigned long)page;
 
-       if ((attrs & DMA_ATTR_NON_CONSISTENT) == 0) {
-               /*
-                * We need to iterate through the pages, clearing the dcache for
-                * them and setting the cache-inhibit bit.
-                */
-               if (walk_page_range(va, va + size, &walk)) {
-                       free_pages_exact(page, size);
-                       return NULL;
-               }
+       /*
+        * We need to iterate through the pages, clearing the dcache for
+        * them and setting the cache-inhibit bit.
+        */
+       if (walk_page_range(va, va + size, &walk)) {
+               free_pages_exact(page, size);
+               return NULL;
        }
 
        return (void *)va;
@@ -118,10 +116,8 @@ arch_dma_free(struct device *dev, size_t size, void *vaddr,
                .mm = &init_mm
        };
 
-       if ((attrs & DMA_ATTR_NON_CONSISTENT) == 0) {
-               /* walk_page_range shouldn't be able to fail here */
-               WARN_ON(walk_page_range(va, va + size, &walk));
-       }
+       /* walk_page_range shouldn't be able to fail here */
+       WARN_ON(walk_page_range(va, va + size, &walk));
 
        free_pages_exact(vaddr, size);
 }
index 239162355b58c7d93a6d64951b470b2ee3a91e14..ca35d9a76e5062ea12e150310fdf3bda035e0893 100644 (file)
@@ -394,17 +394,20 @@ pcxl_dma_init(void)
 
 __initcall(pcxl_dma_init);
 
-static void *pcxl_dma_alloc(struct device *dev, size_t size,
-               dma_addr_t *dma_handle, gfp_t flag, unsigned long attrs)
+void *arch_dma_alloc(struct device *dev, size_t size,
+               dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs)
 {
        unsigned long vaddr;
        unsigned long paddr;
        int order;
 
+       if (boot_cpu_data.cpu_type != pcxl2 && boot_cpu_data.cpu_type != pcxl)
+               return NULL;
+
        order = get_order(size);
        size = 1 << (order + PAGE_SHIFT);
        vaddr = pcxl_alloc_range(size);
-       paddr = __get_free_pages(flag | __GFP_ZERO, order);
+       paddr = __get_free_pages(gfp | __GFP_ZERO, order);
        flush_kernel_dcache_range(paddr, size);
        paddr = __pa(paddr);
        map_uncached_pages(vaddr, size, paddr);
@@ -421,44 +424,19 @@ static void *pcxl_dma_alloc(struct device *dev, size_t size,
        return (void *)vaddr;
 }
 
-static void *pcx_dma_alloc(struct device *dev, size_t size,
-               dma_addr_t *dma_handle, gfp_t flag, unsigned long attrs)
-{
-       void *addr;
-
-       if ((attrs & DMA_ATTR_NON_CONSISTENT) == 0)
-               return NULL;
-
-       addr = (void *)__get_free_pages(flag | __GFP_ZERO, get_order(size));
-       if (addr)
-               *dma_handle = (dma_addr_t)virt_to_phys(addr);
-
-       return addr;
-}
-
-void *arch_dma_alloc(struct device *dev, size_t size,
-               dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs)
-{
-
-       if (boot_cpu_data.cpu_type == pcxl2 || boot_cpu_data.cpu_type == pcxl)
-               return pcxl_dma_alloc(dev, size, dma_handle, gfp, attrs);
-       else
-               return pcx_dma_alloc(dev, size, dma_handle, gfp, attrs);
-}
-
 void arch_dma_free(struct device *dev, size_t size, void *vaddr,
                dma_addr_t dma_handle, unsigned long attrs)
 {
        int order = get_order(size);
 
-       if (boot_cpu_data.cpu_type == pcxl2 || boot_cpu_data.cpu_type == pcxl) {
-               size = 1 << (order + PAGE_SHIFT);
-               unmap_uncached_pages((unsigned long)vaddr, size);
-               pcxl_free_range((unsigned long)vaddr, size);
+       WARN_ON_ONCE(boot_cpu_data.cpu_type != pcxl2 &&
+                    boot_cpu_data.cpu_type != pcxl);
 
-               vaddr = __va(dma_handle);
-       }
-       free_pages((unsigned long)vaddr, get_order(size));
+       size = 1 << (order + PAGE_SHIFT);
+       unmap_uncached_pages((unsigned long)vaddr, size);
+       pcxl_free_range((unsigned long)vaddr, size);
+
+       free_pages((unsigned long)__va(dma_handle), order);
 }
 
 void arch_sync_dma_for_device(struct device *dev, phys_addr_t paddr,
index a87f8a308cc1bcc057fd05765a838867399b44f3..65f05776d827fecbae83a0552dac4324cd255847 100644 (file)
@@ -163,10 +163,6 @@ void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
 
        *handle = phys_to_dma(dev, page_to_phys(page));
 
-       if (attrs & DMA_ATTR_NO_KERNEL_MAPPING) {
-               return page;
-       }
-
 #ifdef CONFIG_MMU
        if (PageHighMem(page)) {
                void *p;
@@ -192,9 +188,7 @@ void arch_dma_free(struct device *dev, size_t size, void *vaddr,
        unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT;
        struct page *page;
 
-       if (attrs & DMA_ATTR_NO_KERNEL_MAPPING) {
-               page = vaddr;
-       } else if (platform_vaddr_uncached(vaddr)) {
+       if (platform_vaddr_uncached(vaddr)) {
                page = virt_to_page(platform_vaddr_to_cached(vaddr));
        } else {
 #ifdef CONFIG_MMU
index f802255219d3ac942f290ded71815b05d447de4b..a7f9c3edbcb299f83f8d4c6093c274a0bc5b3387 100644 (file)
@@ -951,8 +951,8 @@ static void __iommu_dma_free(struct device *dev, size_t size, void *cpu_addr)
 
        if (pages)
                __iommu_dma_free_pages(pages, count);
-       if (page && !dma_release_from_contiguous(dev, page, count))
-               __free_pages(page, get_order(alloc_size));
+       if (page)
+               dma_free_contiguous(dev, page, alloc_size);
 }
 
 static void iommu_dma_free(struct device *dev, size_t size, void *cpu_addr,
@@ -970,12 +970,7 @@ static void *iommu_dma_alloc_pages(struct device *dev, size_t size,
        struct page *page = NULL;
        void *cpu_addr;
 
-       if (gfpflags_allow_blocking(gfp))
-               page = dma_alloc_from_contiguous(dev, alloc_size >> PAGE_SHIFT,
-                                                get_order(alloc_size),
-                                                gfp & __GFP_NOWARN);
-       if (!page)
-               page = alloc_pages(gfp, get_order(alloc_size));
+       page = dma_alloc_contiguous(dev, alloc_size, gfp);
        if (!page)
                return NULL;
 
@@ -997,8 +992,7 @@ static void *iommu_dma_alloc_pages(struct device *dev, size_t size,
        memset(cpu_addr, 0, alloc_size);
        return cpu_addr;
 out_free_pages:
-       if (!dma_release_from_contiguous(dev, page, alloc_size >> PAGE_SHIFT))
-               __free_pages(page, get_order(alloc_size));
+       dma_free_contiguous(dev, page, alloc_size);
        return NULL;
 }
 
index 94573fb6830403319e5afd0f10ff29f06c718057..6e59d370ef8104bd25794305146be6878219d6d2 100644 (file)
@@ -45,6 +45,7 @@ config USB_ARCH_HAS_HCD
 config USB
        tristate "Support for Host-side USB"
        depends on USB_ARCH_HAS_HCD
+       select GENERIC_ALLOCATOR
        select USB_COMMON
        select NLS  # for UTF-8 strings
        ---help---
index f641342cdec09491bb856f114123322bc0446cb6..1359b78a624e6b56dcbea6bcf19c3be1960b3848 100644 (file)
@@ -16,6 +16,7 @@
 #include <linux/io.h>
 #include <linux/dma-mapping.h>
 #include <linux/dmapool.h>
+#include <linux/genalloc.h>
 #include <linux/usb.h>
 #include <linux/usb/hcd.h>
 
@@ -67,7 +68,7 @@ int hcd_buffer_create(struct usb_hcd *hcd)
 
        if (!IS_ENABLED(CONFIG_HAS_DMA) ||
            (!is_device_dma_capable(hcd->self.sysdev) &&
-            !(hcd->driver->flags & HCD_LOCAL_MEM)))
+            !hcd->localmem_pool))
                return 0;
 
        for (i = 0; i < HCD_BUFFER_POOLS; i++) {
@@ -124,10 +125,12 @@ void *hcd_buffer_alloc(
        if (size == 0)
                return NULL;
 
+       if (hcd->localmem_pool)
+               return gen_pool_dma_alloc(hcd->localmem_pool, size, dma);
+
        /* some USB hosts just use PIO */
        if (!IS_ENABLED(CONFIG_HAS_DMA) ||
-           (!is_device_dma_capable(bus->sysdev) &&
-            !(hcd->driver->flags & HCD_LOCAL_MEM))) {
+           !is_device_dma_capable(bus->sysdev)) {
                *dma = ~(dma_addr_t) 0;
                return kmalloc(size, mem_flags);
        }
@@ -152,9 +155,13 @@ void hcd_buffer_free(
        if (!addr)
                return;
 
+       if (hcd->localmem_pool) {
+               gen_pool_free(hcd->localmem_pool, (unsigned long)addr, size);
+               return;
+       }
+
        if (!IS_ENABLED(CONFIG_HAS_DMA) ||
-           (!is_device_dma_capable(bus->sysdev) &&
-            !(hcd->driver->flags & HCD_LOCAL_MEM))) {
+           !is_device_dma_capable(bus->sysdev)) {
                kfree(addr);
                return;
        }
index 94d22551fc1bf1bd9d1c6f99649bd70692c5fd3c..88533938ce19512aa5cdeaa2469721b39560d063 100644 (file)
@@ -29,6 +29,8 @@
 #include <linux/workqueue.h>
 #include <linux/pm_runtime.h>
 #include <linux/types.h>
+#include <linux/genalloc.h>
+#include <linux/io.h>
 
 #include <linux/phy/phy.h>
 #include <linux/usb.h>
@@ -1345,14 +1347,14 @@ EXPORT_SYMBOL_GPL(usb_hcd_unlink_urb_from_ep);
  * using regular system memory - like pci devices doing bus mastering.
  *
  * To support host controllers with limited dma capabilities we provide dma
- * bounce buffers. This feature can be enabled using the HCD_LOCAL_MEM flag.
+ * bounce buffers. This feature can be enabled by initializing
+ * hcd->localmem_pool using usb_hcd_setup_local_mem().
  * For this to work properly the host controller code must first use the
  * function dma_declare_coherent_memory() to point out which memory area
  * that should be used for dma allocations.
  *
- * The HCD_LOCAL_MEM flag then tells the usb code to allocate all data for
- * dma using dma_alloc_coherent() which in turn allocates from the memory
- * area pointed out with dma_declare_coherent_memory().
+ * The initialized hcd->localmem_pool then tells the usb code to allocate all
+ * data for dma using the genalloc API.
  *
  * So, to summarize...
  *
@@ -1362,9 +1364,6 @@ EXPORT_SYMBOL_GPL(usb_hcd_unlink_urb_from_ep);
  *   (a) "normal" kernel memory is no good, and
  *   (b) there's not enough to share
  *
- * - The only *portable* hook for such stuff in the
- *   DMA framework is dma_declare_coherent_memory()
- *
  * - So we use that, even though the primary requirement
  *   is that the memory be "local" (hence addressable
  *   by that device), not "coherent".
@@ -1531,7 +1530,7 @@ int usb_hcd_map_urb_for_dma(struct usb_hcd *hcd, struct urb *urb,
                                                urb->setup_dma))
                                return -EAGAIN;
                        urb->transfer_flags |= URB_SETUP_MAP_SINGLE;
-               } else if (hcd->driver->flags & HCD_LOCAL_MEM) {
+               } else if (hcd->localmem_pool) {
                        ret = hcd_alloc_coherent(
                                        urb->dev->bus, mem_flags,
                                        &urb->setup_dma,
@@ -1601,7 +1600,7 @@ int usb_hcd_map_urb_for_dma(struct usb_hcd *hcd, struct urb *urb,
                                else
                                        urb->transfer_flags |= URB_DMA_MAP_SINGLE;
                        }
-               } else if (hcd->driver->flags & HCD_LOCAL_MEM) {
+               } else if (hcd->localmem_pool) {
                        ret = hcd_alloc_coherent(
                                        urb->dev->bus, mem_flags,
                                        &urb->transfer_dma,
@@ -3039,6 +3038,40 @@ usb_hcd_platform_shutdown(struct platform_device *dev)
 }
 EXPORT_SYMBOL_GPL(usb_hcd_platform_shutdown);
 
+int usb_hcd_setup_local_mem(struct usb_hcd *hcd, phys_addr_t phys_addr,
+                           dma_addr_t dma, size_t size)
+{
+       int err;
+       void *local_mem;
+
+       hcd->localmem_pool = devm_gen_pool_create(hcd->self.sysdev, 4,
+                                                 dev_to_node(hcd->self.sysdev),
+                                                 dev_name(hcd->self.sysdev));
+       if (IS_ERR(hcd->localmem_pool))
+               return PTR_ERR(hcd->localmem_pool);
+
+       local_mem = devm_memremap(hcd->self.sysdev, phys_addr,
+                                 size, MEMREMAP_WC);
+       if (!local_mem)
+               return -ENOMEM;
+
+       /*
+        * Here we pass a dma_addr_t but the arg type is a phys_addr_t.
+        * It's not backed by system memory and thus there's no kernel mapping
+        * for it.
+        */
+       err = gen_pool_add_virt(hcd->localmem_pool, (unsigned long)local_mem,
+                               dma, size, dev_to_node(hcd->self.sysdev));
+       if (err < 0) {
+               dev_err(hcd->self.sysdev, "gen_pool_add_virt failed with %d\n",
+                       err);
+               return err;
+       }
+
+       return 0;
+}
+EXPORT_SYMBOL_GPL(usb_hcd_setup_local_mem);
+
 /*-------------------------------------------------------------------------*/
 
 #if IS_ENABLED(CONFIG_USB_MON)
index cdafa97f632d5493f4a81e1330fda9e975bb4ede..9da7e22848c98827c43863a19c7da8259c4f6aeb 100644 (file)
@@ -559,7 +559,7 @@ static int ehci_init(struct usb_hcd *hcd)
        ehci->command = temp;
 
        /* Accept arbitrarily long scatter-gather lists */
-       if (!(hcd->driver->flags & HCD_LOCAL_MEM))
+       if (!hcd->localmem_pool)
                hcd->self.sg_tablesize = ~0;
 
        /* Prepare for unlinking active QHs */
index e835a22b12af6a80b6dc093cc55d67df8858c256..77cc36efae9500ea94f7109f5d7abed19812275a 100644 (file)
@@ -4996,7 +4996,7 @@ static int hcd_fotg210_init(struct usb_hcd *hcd)
        fotg210->command = temp;
 
        /* Accept arbitrarily long scatter-gather lists */
-       if (!(hcd->driver->flags & HCD_LOCAL_MEM))
+       if (!hcd->localmem_pool)
                hcd->self.sg_tablesize = ~0;
        return 0;
 }
index 210181fd98d2e9d6e850662becf6ec08f7b901c2..b457fdaff29746a0e70794b06308289290b2baf9 100644 (file)
@@ -40,6 +40,7 @@
 #include <linux/dmapool.h>
 #include <linux/workqueue.h>
 #include <linux/debugfs.h>
+#include <linux/genalloc.h>
 
 #include <asm/io.h>
 #include <asm/irq.h>
@@ -447,7 +448,7 @@ static int ohci_init (struct ohci_hcd *ohci)
        struct usb_hcd *hcd = ohci_to_hcd(ohci);
 
        /* Accept arbitrarily long scatter-gather lists */
-       if (!(hcd->driver->flags & HCD_LOCAL_MEM))
+       if (!hcd->localmem_pool)
                hcd->self.sg_tablesize = ~0;
 
        if (distrust_firmware)
@@ -505,8 +506,15 @@ static int ohci_init (struct ohci_hcd *ohci)
        timer_setup(&ohci->io_watchdog, io_watchdog_func, 0);
        ohci->prev_frame_no = IO_WATCHDOG_OFF;
 
-       ohci->hcca = dma_alloc_coherent (hcd->self.controller,
-                       sizeof(*ohci->hcca), &ohci->hcca_dma, GFP_KERNEL);
+       if (hcd->localmem_pool)
+               ohci->hcca = gen_pool_dma_alloc_align(hcd->localmem_pool,
+                                               sizeof(*ohci->hcca),
+                                               &ohci->hcca_dma, 256);
+       else
+               ohci->hcca = dma_alloc_coherent(hcd->self.controller,
+                                               sizeof(*ohci->hcca),
+                                               &ohci->hcca_dma,
+                                               GFP_KERNEL);
        if (!ohci->hcca)
                return -ENOMEM;
 
@@ -990,9 +998,14 @@ static void ohci_stop (struct usb_hcd *hcd)
        remove_debug_files (ohci);
        ohci_mem_cleanup (ohci);
        if (ohci->hcca) {
-               dma_free_coherent (hcd->self.controller,
-                               sizeof *ohci->hcca,
-                               ohci->hcca, ohci->hcca_dma);
+               if (hcd->localmem_pool)
+                       gen_pool_free(hcd->localmem_pool,
+                                     (unsigned long)ohci->hcca,
+                                     sizeof(*ohci->hcca));
+               else
+                       dma_free_coherent(hcd->self.controller,
+                                         sizeof(*ohci->hcca),
+                                         ohci->hcca, ohci->hcca_dma);
                ohci->hcca = NULL;
                ohci->hcca_dma = 0;
        }
index 3965ac0341eb14bb35eb69ac0d9f708603b2bcbb..1425335c6bafdc0c30bd6f1c8efb8322f63df59e 100644 (file)
@@ -36,6 +36,13 @@ static void ohci_hcd_init (struct ohci_hcd *ohci)
 
 static int ohci_mem_init (struct ohci_hcd *ohci)
 {
+       /*
+        * HCs with local memory allocate from localmem_pool so there's
+        * no need to create the below dma pools.
+        */
+       if (ohci_to_hcd(ohci)->localmem_pool)
+               return 0;
+
        ohci->td_cache = dma_pool_create ("ohci_td",
                ohci_to_hcd(ohci)->self.controller,
                sizeof (struct td),
@@ -84,8 +91,13 @@ td_alloc (struct ohci_hcd *hc, gfp_t mem_flags)
 {
        dma_addr_t      dma;
        struct td       *td;
+       struct usb_hcd  *hcd = ohci_to_hcd(hc);
 
-       td = dma_pool_zalloc (hc->td_cache, mem_flags, &dma);
+       if (hcd->localmem_pool)
+               td = gen_pool_dma_zalloc_align(hcd->localmem_pool,
+                               sizeof(*td), &dma, 32);
+       else
+               td = dma_pool_zalloc(hc->td_cache, mem_flags, &dma);
        if (td) {
                /* in case hc fetches it, make it look dead */
                td->hwNextTD = cpu_to_hc32 (hc, dma);
@@ -99,6 +111,7 @@ static void
 td_free (struct ohci_hcd *hc, struct td *td)
 {
        struct td       **prev = &hc->td_hash [TD_HASH_FUNC (td->td_dma)];
+       struct usb_hcd  *hcd = ohci_to_hcd(hc);
 
        while (*prev && *prev != td)
                prev = &(*prev)->td_hash;
@@ -106,7 +119,12 @@ td_free (struct ohci_hcd *hc, struct td *td)
                *prev = td->td_hash;
        else if ((td->hwINFO & cpu_to_hc32(hc, TD_DONE)) != 0)
                ohci_dbg (hc, "no hash for td %p\n", td);
-       dma_pool_free (hc->td_cache, td, td->td_dma);
+
+       if (hcd->localmem_pool)
+               gen_pool_free(hcd->localmem_pool, (unsigned long)td,
+                             sizeof(*td));
+       else
+               dma_pool_free(hc->td_cache, td, td->td_dma);
 }
 
 /*-------------------------------------------------------------------------*/
@@ -117,8 +135,13 @@ ed_alloc (struct ohci_hcd *hc, gfp_t mem_flags)
 {
        dma_addr_t      dma;
        struct ed       *ed;
+       struct usb_hcd  *hcd = ohci_to_hcd(hc);
 
-       ed = dma_pool_zalloc (hc->ed_cache, mem_flags, &dma);
+       if (hcd->localmem_pool)
+               ed = gen_pool_dma_zalloc_align(hcd->localmem_pool,
+                               sizeof(*ed), &dma, 16);
+       else
+               ed = dma_pool_zalloc(hc->ed_cache, mem_flags, &dma);
        if (ed) {
                INIT_LIST_HEAD (&ed->td_list);
                ed->dma = dma;
@@ -129,6 +152,12 @@ ed_alloc (struct ohci_hcd *hc, gfp_t mem_flags)
 static void
 ed_free (struct ohci_hcd *hc, struct ed *ed)
 {
-       dma_pool_free (hc->ed_cache, ed, ed->dma);
+       struct usb_hcd  *hcd = ohci_to_hcd(hc);
+
+       if (hcd->localmem_pool)
+               gen_pool_free(hcd->localmem_pool, (unsigned long)ed,
+                             sizeof(*ed));
+       else
+               dma_pool_free(hc->ed_cache, ed, ed->dma);
 }
 
index c26228c25f99df1f3f0ab60a18a6ee4e58e1276e..c158cda9e4b9bc509d8e7fdf63fc6846fe8e07b9 100644 (file)
@@ -49,7 +49,7 @@ static const struct hc_driver ohci_sm501_hc_driver = {
         * generic hardware linkage
         */
        .irq =                  ohci_irq,
-       .flags =                HCD_USB11 | HCD_MEMORY | HCD_LOCAL_MEM,
+       .flags =                HCD_USB11 | HCD_MEMORY,
 
        /*
         * basic lifecycle operations
@@ -110,40 +110,18 @@ static int ohci_hcd_sm501_drv_probe(struct platform_device *pdev)
                goto err0;
        }
 
-       /* The sm501 chip is equipped with local memory that may be used
-        * by on-chip devices such as the video controller and the usb host.
-        * This driver uses dma_declare_coherent_memory() to make sure
-        * usb allocations with dma_alloc_coherent() allocate from
-        * this local memory. The dma_handle returned by dma_alloc_coherent()
-        * will be an offset starting from 0 for the first local memory byte.
-        *
-        * So as long as data is allocated using dma_alloc_coherent() all is
-        * fine. This is however not always the case - buffers may be allocated
-        * using kmalloc() - so the usb core needs to be told that it must copy
-        * data into our local memory if the buffers happen to be placed in
-        * regular memory. The HCD_LOCAL_MEM flag does just that.
-        */
-
-       retval = dma_declare_coherent_memory(dev, mem->start,
-                                        mem->start - mem->parent->start,
-                                        resource_size(mem));
-       if (retval) {
-               dev_err(dev, "cannot declare coherent memory\n");
-               goto err1;
-       }
-
        /* allocate, reserve and remap resources for registers */
        res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
        if (res == NULL) {
                dev_err(dev, "no resource definition for registers\n");
                retval = -ENOENT;
-               goto err2;
+               goto err1;
        }
 
        hcd = usb_create_hcd(driver, &pdev->dev, dev_name(&pdev->dev));
        if (!hcd) {
                retval = -ENOMEM;
-               goto err2;
+               goto err1;
        }
 
        hcd->rsrc_start = res->start;
@@ -164,6 +142,25 @@ static int ohci_hcd_sm501_drv_probe(struct platform_device *pdev)
 
        ohci_hcd_init(hcd_to_ohci(hcd));
 
+       /* The sm501 chip is equipped with local memory that may be used
+        * by on-chip devices such as the video controller and the usb host.
+        * This driver uses genalloc so that usb allocations with
+        * gen_pool_dma_alloc() allocate from this local memory. The dma_handle
+        * returned by gen_pool_dma_alloc() will be an offset starting from 0
+        * for the first local memory byte.
+        *
+        * So as long as data is allocated using gen_pool_dma_alloc() all is
+        * fine. This is however not always the case - buffers may be allocated
+        * using kmalloc() - so the usb core needs to be told that it must copy
+        * data into our local memory if the buffers happen to be placed in
+        * regular memory. A non-null hcd->localmem_pool initialized by the
+        * the call to usb_hcd_setup_local_mem() below does just that.
+        */
+
+       if (usb_hcd_setup_local_mem(hcd, mem->start,
+                                   mem->start - mem->parent->start,
+                                   resource_size(mem)) < 0)
+               goto err5;
        retval = usb_add_hcd(hcd, irq, IRQF_SHARED);
        if (retval)
                goto err5;
@@ -181,8 +178,6 @@ err4:
        release_mem_region(hcd->rsrc_start, hcd->rsrc_len);
 err3:
        usb_put_hcd(hcd);
-err2:
-       dma_release_declared_memory(dev);
 err1:
        release_mem_region(mem->start, resource_size(mem));
 err0:
@@ -197,7 +192,6 @@ static int ohci_hcd_sm501_drv_remove(struct platform_device *pdev)
        usb_remove_hcd(hcd);
        release_mem_region(hcd->rsrc_start, hcd->rsrc_len);
        usb_put_hcd(hcd);
-       dma_release_declared_memory(&pdev->dev);
        mem = platform_get_resource(pdev, IORESOURCE_MEM, 1);
        if (mem)
                release_mem_region(mem->start, resource_size(mem));
index f88a0370659fe3f5fdae1d1c5440d592be4a2c21..d5a293a707b6fa7b102fdcd8978d92e14ab8202d 100644 (file)
@@ -153,7 +153,7 @@ static const struct hc_driver ohci_tmio_hc_driver = {
 
        /* generic hardware linkage */
        .irq =                  ohci_irq,
-       .flags =                HCD_USB11 | HCD_MEMORY | HCD_LOCAL_MEM,
+       .flags =                HCD_USB11 | HCD_MEMORY,
 
        /* basic lifecycle operations */
        .start =                ohci_tmio_start,
@@ -224,11 +224,6 @@ static int ohci_hcd_tmio_drv_probe(struct platform_device *dev)
                goto err_ioremap_regs;
        }
 
-       ret = dma_declare_coherent_memory(&dev->dev, sram->start, sram->start,
-                               resource_size(sram));
-       if (ret)
-               goto err_dma_declare;
-
        if (cell->enable) {
                ret = cell->enable(dev);
                if (ret)
@@ -239,6 +234,11 @@ static int ohci_hcd_tmio_drv_probe(struct platform_device *dev)
        ohci = hcd_to_ohci(hcd);
        ohci_hcd_init(ohci);
 
+       ret = usb_hcd_setup_local_mem(hcd, sram->start, sram->start,
+                                     resource_size(sram));
+       if (ret < 0)
+               goto err_enable;
+
        ret = usb_add_hcd(hcd, irq, 0);
        if (ret)
                goto err_add_hcd;
@@ -254,8 +254,6 @@ err_add_hcd:
        if (cell->disable)
                cell->disable(dev);
 err_enable:
-       dma_release_declared_memory(&dev->dev);
-err_dma_declare:
        iounmap(hcd->regs);
 err_ioremap_regs:
        iounmap(tmio->ccr);
@@ -276,7 +274,6 @@ static int ohci_hcd_tmio_drv_remove(struct platform_device *dev)
        tmio_stop_hc(dev);
        if (cell->disable)
                cell->disable(dev);
-       dma_release_declared_memory(&dev->dev);
        iounmap(hcd->regs);
        iounmap(tmio->ccr);
        usb_put_hcd(hcd);
index ef4813bfc5bf1c775162672563a9648bbbc1fc5a..b015b00774b217c2e42b27920e34c25485615ddc 100644 (file)
@@ -385,6 +385,8 @@ struct ohci_hcd {
 
        /*
         * memory management for queue data structures
+        *
+        * @td_cache and @ed_cache are %NULL if &usb_hcd.localmem_pool is used.
         */
        struct dma_pool         *td_cache;
        struct dma_pool         *ed_cache;
index 98deb5f642687af645f3e6bab09b86b1db416492..03bc59755123fda5177c29a3a66a02d450fcc114 100644 (file)
@@ -581,7 +581,7 @@ static int uhci_start(struct usb_hcd *hcd)
 
        hcd->uses_new_polling = 1;
        /* Accept arbitrarily long scatter-gather lists */
-       if (!(hcd->driver->flags & HCD_LOCAL_MEM))
+       if (!hcd->localmem_pool)
                hcd->self.sg_tablesize = ~0;
 
        spin_lock_init(&uhci->lock);
index 0adf0683cf081f8439935793ebd7ed24fa2cc8a3..99941ae1f3a1cb489bf01bd334b77146c03c7f59 100644 (file)
@@ -340,14 +340,12 @@ int au1100fb_fb_pan_display(struct fb_var_screeninfo *var, struct fb_info *fbi)
  */
 int au1100fb_fb_mmap(struct fb_info *fbi, struct vm_area_struct *vma)
 {
-       struct au1100fb_device *fbdev;
-
-       fbdev = to_au1100fb_device(fbi);
+       struct au1100fb_device *fbdev = to_au1100fb_device(fbi);
 
-       vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
        pgprot_val(vma->vm_page_prot) |= (6 << 9); //CCA=6
 
-       return vm_iomap_memory(vma, fbdev->fb_phys, fbdev->fb_len);
+       return dma_mmap_coherent(fbdev->dev, vma, fbdev->fb_mem, fbdev->fb_phys,
+                       fbdev->fb_len);
 }
 
 static struct fb_ops au1100fb_ops =
@@ -412,7 +410,6 @@ static int au1100fb_drv_probe(struct platform_device *dev)
 {
        struct au1100fb_device *fbdev;
        struct resource *regs_res;
-       unsigned long page;
        struct clk *c;
 
        /* Allocate new device private */
@@ -424,6 +421,7 @@ static int au1100fb_drv_probe(struct platform_device *dev)
                goto failed;
 
        platform_set_drvdata(dev, (void *)fbdev);
+       fbdev->dev = &dev->dev;
 
        /* Allocate region for our registers and map them */
        regs_res = platform_get_resource(dev, IORESOURCE_MEM, 0);
@@ -472,20 +470,6 @@ static int au1100fb_drv_probe(struct platform_device *dev)
        au1100fb_fix.smem_start = fbdev->fb_phys;
        au1100fb_fix.smem_len = fbdev->fb_len;
 
-       /*
-        * Set page reserved so that mmap will work. This is necessary
-        * since we'll be remapping normal memory.
-        */
-       for (page = (unsigned long)fbdev->fb_mem;
-            page < PAGE_ALIGN((unsigned long)fbdev->fb_mem + fbdev->fb_len);
-            page += PAGE_SIZE) {
-#ifdef CONFIG_DMA_NONCOHERENT
-               SetPageReserved(virt_to_page(CAC_ADDR((void *)page)));
-#else
-               SetPageReserved(virt_to_page(page));
-#endif
-       }
-
        print_dbg("Framebuffer memory map at %p", fbdev->fb_mem);
        print_dbg("phys=0x%08x, size=%dK", fbdev->fb_phys, fbdev->fb_len / 1024);
 
index 9af19939a9c636de2acd965c2b7fd7b247530c9d..e7239bceefd3ad347fba90fd0160f4b212b6378f 100644 (file)
@@ -110,6 +110,7 @@ struct au1100fb_device {
        dma_addr_t              fb_phys;
        int                     panel_idx;
        struct clk              *lcdclk;
+       struct device           *dev;
 };
 
 /********************************************************************/
index 6665fa03c0d1a851a9b838035d0780dfd018790f..c05d4e661489b3625a2e83874cc87d4a51edf875 100644 (file)
@@ -50,6 +50,7 @@
 #ifdef __KERNEL__
 
 #include <linux/device.h>
+#include <linux/mm.h>
 
 struct cma;
 struct page;
@@ -111,6 +112,8 @@ struct page *dma_alloc_from_contiguous(struct device *dev, size_t count,
                                       unsigned int order, bool no_warn);
 bool dma_release_from_contiguous(struct device *dev, struct page *pages,
                                 int count);
+struct page *dma_alloc_contiguous(struct device *dev, size_t size, gfp_t gfp);
+void dma_free_contiguous(struct device *dev, struct page *page, size_t size);
 
 #else
 
@@ -153,6 +156,22 @@ bool dma_release_from_contiguous(struct device *dev, struct page *pages,
        return false;
 }
 
+/* Use fallback alloc() and free() when CONFIG_DMA_CMA=n */
+static inline struct page *dma_alloc_contiguous(struct device *dev, size_t size,
+               gfp_t gfp)
+{
+       int node = dev ? dev_to_node(dev) : NUMA_NO_NODE;
+       size_t align = get_order(PAGE_ALIGN(size));
+
+       return alloc_pages_node(node, gfp, align);
+}
+
+static inline void dma_free_contiguous(struct device *dev, struct page *page,
+               size_t size)
+{
+       __free_pages(page, get_order(size));
+}
+
 #endif
 
 #endif
index 9741767e400fbff4e94a56934187fb1920fd510a..3813211a9aadef0e2dc2f96267faf914cad2987d 100644 (file)
@@ -20,6 +20,22 @@ static inline bool dev_is_dma_coherent(struct device *dev)
 }
 #endif /* CONFIG_ARCH_HAS_DMA_COHERENCE_H */
 
+/*
+ * Check if an allocation needs to be marked uncached to be coherent.
+ */
+static __always_inline bool dma_alloc_need_uncached(struct device *dev,
+               unsigned long attrs)
+{
+       if (dev_is_dma_coherent(dev))
+               return false;
+       if (attrs & DMA_ATTR_NO_KERNEL_MAPPING)
+               return false;
+       if (IS_ENABLED(CONFIG_DMA_NONCOHERENT_CACHE_SYNC) &&
+           (attrs & DMA_ATTR_NON_CONSISTENT))
+               return false;
+       return true;
+}
+
 void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
                gfp_t gfp, unsigned long attrs);
 void arch_dma_free(struct device *dev, size_t size, void *cpu_addr,
@@ -80,4 +96,7 @@ static inline void arch_dma_prep_coherent(struct page *page, size_t size)
 }
 #endif /* CONFIG_ARCH_HAS_DMA_PREP_COHERENT */
 
+void *uncached_kernel_address(void *addr);
+void *cached_kernel_address(void *addr);
+
 #endif /* _LINUX_DMA_NONCOHERENT_H */
index 205f62b8d2916b8d3ad9c92ebdfd3ed00aee06fb..4bd583bd6934ea87e0b178b07f69a5c480d1fa48 100644 (file)
@@ -155,6 +155,15 @@ static inline unsigned long gen_pool_alloc(struct gen_pool *pool, size_t size)
 
 extern void *gen_pool_dma_alloc(struct gen_pool *pool, size_t size,
                dma_addr_t *dma);
+extern void *gen_pool_dma_alloc_algo(struct gen_pool *pool, size_t size,
+               dma_addr_t *dma, genpool_algo_t algo, void *data);
+extern void *gen_pool_dma_alloc_align(struct gen_pool *pool, size_t size,
+               dma_addr_t *dma, int align);
+extern void *gen_pool_dma_zalloc(struct gen_pool *pool, size_t size, dma_addr_t *dma);
+extern void *gen_pool_dma_zalloc_algo(struct gen_pool *pool, size_t size,
+               dma_addr_t *dma, genpool_algo_t algo, void *data);
+extern void *gen_pool_dma_zalloc_align(struct gen_pool *pool, size_t size,
+               dma_addr_t *dma, int align);
 extern void gen_pool_free_owner(struct gen_pool *pool, unsigned long addr,
                size_t size, void **owner);
 static inline void gen_pool_free(struct gen_pool *pool, unsigned long addr,
index bb57b5af47002ff8d3d738651e22be1b98020a06..bab27ccc8ff564eb70d364746e1fc317c0fd3fb2 100644 (file)
@@ -216,6 +216,9 @@ struct usb_hcd {
 #define        HC_IS_RUNNING(state) ((state) & __ACTIVE)
 #define        HC_IS_SUSPENDED(state) ((state) & __SUSPEND)
 
+       /* memory pool for HCs having local memory, or %NULL */
+       struct gen_pool         *localmem_pool;
+
        /* more shared queuing code would be good; it should support
         * smarter scheduling, handle transaction translators, etc;
         * input size of periodic table to an interrupt scheduler.
@@ -253,7 +256,6 @@ struct hc_driver {
 
        int     flags;
 #define        HCD_MEMORY      0x0001          /* HC regs use memory (else I/O) */
-#define        HCD_LOCAL_MEM   0x0002          /* HC needs local memory */
 #define        HCD_SHARED      0x0004          /* Two (or more) usb_hcds share HW */
 #define        HCD_USB11       0x0010          /* USB 1.1 */
 #define        HCD_USB2        0x0020          /* USB 2.0 */
@@ -461,6 +463,8 @@ extern int usb_add_hcd(struct usb_hcd *hcd,
                unsigned int irqnum, unsigned long irqflags);
 extern void usb_remove_hcd(struct usb_hcd *hcd);
 extern int usb_hcd_find_raw_port_number(struct usb_hcd *hcd, int port1);
+int usb_hcd_setup_local_mem(struct usb_hcd *hcd, phys_addr_t phys_addr,
+                           dma_addr_t dma, size_t size);
 
 struct platform_device;
 extern void usb_hcd_platform_shutdown(struct platform_device *dev);
index b2a87905846db68ad65d62a6842893c35a1302fb..bfc0c17f2a3d411fcdee446f8aeeb0c1be0f681e 100644 (file)
@@ -214,6 +214,62 @@ bool dma_release_from_contiguous(struct device *dev, struct page *pages,
        return cma_release(dev_get_cma_area(dev), pages, count);
 }
 
+/**
+ * dma_alloc_contiguous() - allocate contiguous pages
+ * @dev:   Pointer to device for which the allocation is performed.
+ * @size:  Requested allocation size.
+ * @gfp:   Allocation flags.
+ *
+ * This function allocates contiguous memory buffer for specified device. It
+ * first tries to use device specific contiguous memory area if available or
+ * the default global one, then tries a fallback allocation of normal pages.
+ *
+ * Note that it byapss one-page size of allocations from the global area as
+ * the addresses within one page are always contiguous, so there is no need
+ * to waste CMA pages for that kind; it also helps reduce fragmentations.
+ */
+struct page *dma_alloc_contiguous(struct device *dev, size_t size, gfp_t gfp)
+{
+       int node = dev ? dev_to_node(dev) : NUMA_NO_NODE;
+       size_t count = PAGE_ALIGN(size) >> PAGE_SHIFT;
+       size_t align = get_order(PAGE_ALIGN(size));
+       struct page *page = NULL;
+       struct cma *cma = NULL;
+
+       if (dev && dev->cma_area)
+               cma = dev->cma_area;
+       else if (count > 1)
+               cma = dma_contiguous_default_area;
+
+       /* CMA can be used only in the context which permits sleeping */
+       if (cma && gfpflags_allow_blocking(gfp)) {
+               align = min_t(size_t, align, CONFIG_CMA_ALIGNMENT);
+               page = cma_alloc(cma, count, align, gfp & __GFP_NOWARN);
+       }
+
+       /* Fallback allocation of normal pages */
+       if (!page)
+               page = alloc_pages_node(node, gfp, align);
+       return page;
+}
+
+/**
+ * dma_free_contiguous() - release allocated pages
+ * @dev:   Pointer to device for which the pages were allocated.
+ * @page:  Pointer to the allocated pages.
+ * @size:  Size of allocated pages.
+ *
+ * This function releases memory allocated by dma_alloc_contiguous(). As the
+ * cma_release returns false when provided pages do not belong to contiguous
+ * area and true otherwise, this function then does a fallback __free_pages()
+ * upon a false-return.
+ */
+void dma_free_contiguous(struct device *dev, struct page *page, size_t size)
+{
+       if (!cma_release(dev_get_cma_area(dev), page, size >> PAGE_SHIFT))
+               __free_pages(page, get_order(size));
+}
+
 /*
  * Support for reserved memory regions defined in device tree
  */
index 2c2772e9702ab4748e6ef6f18835cdfedc185310..b90e1aede74340942af8ba220a3bf5c7ac51ea27 100644 (file)
@@ -96,8 +96,6 @@ static bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size)
 struct page *__dma_direct_alloc_pages(struct device *dev, size_t size,
                dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs)
 {
-       unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
-       int page_order = get_order(size);
        struct page *page = NULL;
        u64 phys_mask;
 
@@ -109,20 +107,9 @@ struct page *__dma_direct_alloc_pages(struct device *dev, size_t size,
        gfp |= __dma_direct_optimal_gfp_mask(dev, dev->coherent_dma_mask,
                        &phys_mask);
 again:
-       /* CMA can be used only in the context which permits sleeping */
-       if (gfpflags_allow_blocking(gfp)) {
-               page = dma_alloc_from_contiguous(dev, count, page_order,
-                                                gfp & __GFP_NOWARN);
-               if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) {
-                       dma_release_from_contiguous(dev, page, count);
-                       page = NULL;
-               }
-       }
-       if (!page)
-               page = alloc_pages_node(dev_to_node(dev), gfp, page_order);
-
+       page = dma_alloc_contiguous(dev, size, gfp);
        if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) {
-               __free_pages(page, page_order);
+               dma_free_contiguous(dev, page, size);
                page = NULL;
 
                if (IS_ENABLED(CONFIG_ZONE_DMA32) &&
@@ -151,10 +138,18 @@ void *dma_direct_alloc_pages(struct device *dev, size_t size,
        if (!page)
                return NULL;
 
+       if (attrs & DMA_ATTR_NO_KERNEL_MAPPING) {
+               /* remove any dirty cache lines on the kernel alias */
+               if (!PageHighMem(page))
+                       arch_dma_prep_coherent(page, size);
+               /* return the page pointer as the opaque cookie */
+               return page;
+       }
+
        if (PageHighMem(page)) {
                /*
                 * Depending on the cma= arguments and per-arch setup
-                * dma_alloc_from_contiguous could return highmem pages.
+                * dma_alloc_contiguous could return highmem pages.
                 * Without remapping there is no way to return them here,
                 * so log an error and fail.
                 */
@@ -171,15 +166,19 @@ void *dma_direct_alloc_pages(struct device *dev, size_t size,
                *dma_handle = phys_to_dma(dev, page_to_phys(page));
        }
        memset(ret, 0, size);
+
+       if (IS_ENABLED(CONFIG_ARCH_HAS_UNCACHED_SEGMENT) &&
+           dma_alloc_need_uncached(dev, attrs)) {
+               arch_dma_prep_coherent(page, size);
+               ret = uncached_kernel_address(ret);
+       }
+
        return ret;
 }
 
 void __dma_direct_free_pages(struct device *dev, size_t size, struct page *page)
 {
-       unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
-
-       if (!dma_release_from_contiguous(dev, page, count))
-               __free_pages(page, get_order(size));
+       dma_free_contiguous(dev, page, size);
 }
 
 void dma_direct_free_pages(struct device *dev, size_t size, void *cpu_addr,
@@ -187,15 +186,26 @@ void dma_direct_free_pages(struct device *dev, size_t size, void *cpu_addr,
 {
        unsigned int page_order = get_order(size);
 
+       if (attrs & DMA_ATTR_NO_KERNEL_MAPPING) {
+               /* cpu_addr is a struct page cookie, not a kernel address */
+               __dma_direct_free_pages(dev, size, cpu_addr);
+               return;
+       }
+
        if (force_dma_unencrypted())
                set_memory_encrypted((unsigned long)cpu_addr, 1 << page_order);
+
+       if (IS_ENABLED(CONFIG_ARCH_HAS_UNCACHED_SEGMENT) &&
+           dma_alloc_need_uncached(dev, attrs))
+               cpu_addr = cached_kernel_address(cpu_addr);
        __dma_direct_free_pages(dev, size, virt_to_page(cpu_addr));
 }
 
 void *dma_direct_alloc(struct device *dev, size_t size,
                dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs)
 {
-       if (!dev_is_dma_coherent(dev))
+       if (!IS_ENABLED(CONFIG_ARCH_HAS_UNCACHED_SEGMENT) &&
+           dma_alloc_need_uncached(dev, attrs))
                return arch_dma_alloc(dev, size, dma_handle, gfp, attrs);
        return dma_direct_alloc_pages(dev, size, dma_handle, gfp, attrs);
 }
@@ -203,7 +213,8 @@ void *dma_direct_alloc(struct device *dev, size_t size,
 void dma_direct_free(struct device *dev, size_t size,
                void *cpu_addr, dma_addr_t dma_addr, unsigned long attrs)
 {
-       if (!dev_is_dma_coherent(dev))
+       if (!IS_ENABLED(CONFIG_ARCH_HAS_UNCACHED_SEGMENT) &&
+           dma_alloc_need_uncached(dev, attrs))
                arch_dma_free(dev, size, cpu_addr, dma_addr, attrs);
        else
                dma_direct_free_pages(dev, size, cpu_addr, dma_addr, attrs);
index f7afdadb6770bb96b50cd4285b65b887813d2ea5..1f628e7ac7097ab40e577904a91f26fce28610b8 100644 (file)
@@ -317,6 +317,12 @@ void arch_dma_set_mask(struct device *dev, u64 mask);
 
 int dma_set_mask(struct device *dev, u64 mask)
 {
+       /*
+        * Truncate the mask to the actually supported dma_addr_t width to
+        * avoid generating unsupportable addresses.
+        */
+       mask = (dma_addr_t)mask;
+
        if (!dev->dma_mask || !dma_supported(dev, mask))
                return -EIO;
 
@@ -330,6 +336,12 @@ EXPORT_SYMBOL(dma_set_mask);
 #ifndef CONFIG_ARCH_HAS_DMA_SET_COHERENT_MASK
 int dma_set_coherent_mask(struct device *dev, u64 mask)
 {
+       /*
+        * Truncate the mask to the actually supported dma_addr_t width to
+        * avoid generating unsupportable addresses.
+        */
+       mask = (dma_addr_t)mask;
+
        if (!dma_supported(dev, mask))
                return -EIO;
 
index 7a723194ecbed71bec2db5265c932c2f84a8b123..a594aec07882d9a0ad4d482d00afb81e4daa3189 100644 (file)
@@ -158,6 +158,9 @@ out:
 
 bool dma_in_atomic_pool(void *start, size_t size)
 {
+       if (unlikely(!atomic_pool))
+               return false;
+
        return addr_in_gen_pool(atomic_pool, (unsigned long)start, size);
 }
 
@@ -199,8 +202,7 @@ void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
 
        size = PAGE_ALIGN(size);
 
-       if (!gfpflags_allow_blocking(flags) &&
-           !(attrs & DMA_ATTR_NO_KERNEL_MAPPING)) {
+       if (!gfpflags_allow_blocking(flags)) {
                ret = dma_alloc_from_pool(size, &page, flags);
                if (!ret)
                        return NULL;
@@ -214,11 +216,6 @@ void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
        /* remove any dirty cache lines on the kernel alias */
        arch_dma_prep_coherent(page, size);
 
-       if (attrs & DMA_ATTR_NO_KERNEL_MAPPING) {
-               ret = page; /* opaque cookie */
-               goto done;
-       }
-
        /* create a coherent mapping */
        ret = dma_common_contiguous_remap(page, size, VM_USERMAP,
                        arch_dma_mmap_pgprot(dev, PAGE_KERNEL, attrs),
@@ -237,10 +234,7 @@ done:
 void arch_dma_free(struct device *dev, size_t size, void *vaddr,
                dma_addr_t dma_handle, unsigned long attrs)
 {
-       if (attrs & DMA_ATTR_NO_KERNEL_MAPPING) {
-               /* vaddr is a struct page cookie, not a kernel address */
-               __dma_direct_free_pages(dev, size, vaddr);
-       } else if (!dma_free_from_pool(vaddr, PAGE_ALIGN(size))) {
+       if (!dma_free_from_pool(vaddr, PAGE_ALIGN(size))) {
                phys_addr_t phys = dma_to_phys(dev, dma_handle);
                struct page *page = pfn_to_page(__phys_to_pfn(phys));
 
index 5257f74fccf3cf97951bfb150700424ea3c129dc..9fc31292cfa1d0f458116f446e49926e36ed4a6e 100644 (file)
@@ -327,21 +327,45 @@ EXPORT_SYMBOL(gen_pool_alloc_algo_owner);
  * gen_pool_dma_alloc - allocate special memory from the pool for DMA usage
  * @pool: pool to allocate from
  * @size: number of bytes to allocate from the pool
- * @dma: dma-view physical address return value.  Use NULL if unneeded.
+ * @dma: dma-view physical address return value.  Use %NULL if unneeded.
  *
  * Allocate the requested number of bytes from the specified pool.
  * Uses the pool allocation function (with first-fit algorithm by default).
  * Can not be used in NMI handler on architectures without
  * NMI-safe cmpxchg implementation.
+ *
+ * Return: virtual address of the allocated memory, or %NULL on failure
  */
 void *gen_pool_dma_alloc(struct gen_pool *pool, size_t size, dma_addr_t *dma)
+{
+       return gen_pool_dma_alloc_algo(pool, size, dma, pool->algo, pool->data);
+}
+EXPORT_SYMBOL(gen_pool_dma_alloc);
+
+/**
+ * gen_pool_dma_alloc_algo - allocate special memory from the pool for DMA
+ * usage with the given pool algorithm
+ * @pool: pool to allocate from
+ * @size: number of bytes to allocate from the pool
+ * @dma: DMA-view physical address return value. Use %NULL if unneeded.
+ * @algo: algorithm passed from caller
+ * @data: data passed to algorithm
+ *
+ * Allocate the requested number of bytes from the specified pool. Uses the
+ * given pool allocation function. Can not be used in NMI handler on
+ * architectures without NMI-safe cmpxchg implementation.
+ *
+ * Return: virtual address of the allocated memory, or %NULL on failure
+ */
+void *gen_pool_dma_alloc_algo(struct gen_pool *pool, size_t size,
+               dma_addr_t *dma, genpool_algo_t algo, void *data)
 {
        unsigned long vaddr;
 
        if (!pool)
                return NULL;
 
-       vaddr = gen_pool_alloc(pool, size);
+       vaddr = gen_pool_alloc_algo(pool, size, algo, data);
        if (!vaddr)
                return NULL;
 
@@ -350,7 +374,102 @@ void *gen_pool_dma_alloc(struct gen_pool *pool, size_t size, dma_addr_t *dma)
 
        return (void *)vaddr;
 }
-EXPORT_SYMBOL(gen_pool_dma_alloc);
+EXPORT_SYMBOL(gen_pool_dma_alloc_algo);
+
+/**
+ * gen_pool_dma_alloc_align - allocate special memory from the pool for DMA
+ * usage with the given alignment
+ * @pool: pool to allocate from
+ * @size: number of bytes to allocate from the pool
+ * @dma: DMA-view physical address return value. Use %NULL if unneeded.
+ * @align: alignment in bytes for starting address
+ *
+ * Allocate the requested number bytes from the specified pool, with the given
+ * alignment restriction. Can not be used in NMI handler on architectures
+ * without NMI-safe cmpxchg implementation.
+ *
+ * Return: virtual address of the allocated memory, or %NULL on failure
+ */
+void *gen_pool_dma_alloc_align(struct gen_pool *pool, size_t size,
+               dma_addr_t *dma, int align)
+{
+       struct genpool_data_align data = { .align = align };
+
+       return gen_pool_dma_alloc_algo(pool, size, dma,
+                       gen_pool_first_fit_align, &data);
+}
+EXPORT_SYMBOL(gen_pool_dma_alloc_align);
+
+/**
+ * gen_pool_dma_zalloc - allocate special zeroed memory from the pool for
+ * DMA usage
+ * @pool: pool to allocate from
+ * @size: number of bytes to allocate from the pool
+ * @dma: dma-view physical address return value.  Use %NULL if unneeded.
+ *
+ * Allocate the requested number of zeroed bytes from the specified pool.
+ * Uses the pool allocation function (with first-fit algorithm by default).
+ * Can not be used in NMI handler on architectures without
+ * NMI-safe cmpxchg implementation.
+ *
+ * Return: virtual address of the allocated zeroed memory, or %NULL on failure
+ */
+void *gen_pool_dma_zalloc(struct gen_pool *pool, size_t size, dma_addr_t *dma)
+{
+       return gen_pool_dma_zalloc_algo(pool, size, dma, pool->algo, pool->data);
+}
+EXPORT_SYMBOL(gen_pool_dma_zalloc);
+
+/**
+ * gen_pool_dma_zalloc_algo - allocate special zeroed memory from the pool for
+ * DMA usage with the given pool algorithm
+ * @pool: pool to allocate from
+ * @size: number of bytes to allocate from the pool
+ * @dma: DMA-view physical address return value. Use %NULL if unneeded.
+ * @algo: algorithm passed from caller
+ * @data: data passed to algorithm
+ *
+ * Allocate the requested number of zeroed bytes from the specified pool. Uses
+ * the given pool allocation function. Can not be used in NMI handler on
+ * architectures without NMI-safe cmpxchg implementation.
+ *
+ * Return: virtual address of the allocated zeroed memory, or %NULL on failure
+ */
+void *gen_pool_dma_zalloc_algo(struct gen_pool *pool, size_t size,
+               dma_addr_t *dma, genpool_algo_t algo, void *data)
+{
+       void *vaddr = gen_pool_dma_alloc_algo(pool, size, dma, algo, data);
+
+       if (vaddr)
+               memset(vaddr, 0, size);
+
+       return vaddr;
+}
+EXPORT_SYMBOL(gen_pool_dma_zalloc_algo);
+
+/**
+ * gen_pool_dma_zalloc_align - allocate special zeroed memory from the pool for
+ * DMA usage with the given alignment
+ * @pool: pool to allocate from
+ * @size: number of bytes to allocate from the pool
+ * @dma: DMA-view physical address return value. Use %NULL if unneeded.
+ * @align: alignment in bytes for starting address
+ *
+ * Allocate the requested number of zeroed bytes from the specified pool,
+ * with the given alignment restriction. Can not be used in NMI handler on
+ * architectures without NMI-safe cmpxchg implementation.
+ *
+ * Return: virtual address of the allocated zeroed memory, or %NULL on failure
+ */
+void *gen_pool_dma_zalloc_align(struct gen_pool *pool, size_t size,
+               dma_addr_t *dma, int align)
+{
+       struct genpool_data_align data = { .align = align };
+
+       return gen_pool_dma_zalloc_algo(pool, size, dma,
+                       gen_pool_first_fit_align, &data);
+}
+EXPORT_SYMBOL(gen_pool_dma_zalloc_align);
 
 /**
  * gen_pool_free - free allocated special memory back to the pool