drm/imagination: Use cached memory with dma_coherent
authorMatt Coster <matt.coster@imgtec.com>
Thu, 10 Apr 2025 09:55:14 +0000 (10:55 +0100)
committerMatt Coster <matt.coster@imgtec.com>
Tue, 15 Apr 2025 11:21:52 +0000 (12:21 +0100)
The TI k3-j721s2 platform does not allow us to use uncached memory
(which is what the driver currently does) without disabling cache snooping
on the AXI ACE-Lite interface, which would be too much of a performance
hit.

Given the platform is dma-coherent, we can simply force all
device-accessible memory allocations through the CPU cache. In fact, this
can be done whenever the dma_coherent attribute is present.

Reviewed-by: Frank Binns <frank.binns@imgtec.com>
Link: https://lore.kernel.org/r/20250410-sets-bxs-4-64-patch-v1-v6-15-eda620c5865f@imgtec.com
Signed-off-by: Matt Coster <matt.coster@imgtec.com>
drivers/gpu/drm/imagination/pvr_gem.c
drivers/gpu/drm/imagination/pvr_gem.h
drivers/gpu/drm/imagination/pvr_mmu.c

index f692a41875507232ee6e844ccb1f980425f2e9b7..a66cf082af2445044beb286c2d5350849512adb1 100644 (file)
@@ -19,6 +19,7 @@
 #include <linux/log2.h>
 #include <linux/mutex.h>
 #include <linux/pagemap.h>
+#include <linux/property.h>
 #include <linux/refcount.h>
 #include <linux/scatterlist.h>
 
@@ -334,6 +335,7 @@ struct drm_gem_object *pvr_gem_create_object(struct drm_device *drm_dev, size_t
 struct pvr_gem_object *
 pvr_gem_object_create(struct pvr_device *pvr_dev, size_t size, u64 flags)
 {
+       struct drm_device *drm_dev = from_pvr_device(pvr_dev);
        struct drm_gem_shmem_object *shmem_obj;
        struct pvr_gem_object *pvr_obj;
        struct sg_table *sgt;
@@ -343,7 +345,10 @@ pvr_gem_object_create(struct pvr_device *pvr_dev, size_t size, u64 flags)
        if (size == 0 || !pvr_gem_object_flags_validate(flags))
                return ERR_PTR(-EINVAL);
 
-       shmem_obj = drm_gem_shmem_create(from_pvr_device(pvr_dev), size);
+       if (device_get_dma_attr(drm_dev->dev) == DEV_DMA_COHERENT)
+               flags |= PVR_BO_CPU_CACHED;
+
+       shmem_obj = drm_gem_shmem_create(drm_dev, size);
        if (IS_ERR(shmem_obj))
                return ERR_CAST(shmem_obj);
 
@@ -358,8 +363,7 @@ pvr_gem_object_create(struct pvr_device *pvr_dev, size_t size, u64 flags)
                goto err_shmem_object_free;
        }
 
-       dma_sync_sgtable_for_device(shmem_obj->base.dev->dev, sgt,
-                                   DMA_BIDIRECTIONAL);
+       dma_sync_sgtable_for_device(drm_dev->dev, sgt, DMA_BIDIRECTIONAL);
 
        /*
         * Do this last because pvr_gem_object_zero() requires a fully
index e0e5ea509a2e88a437b8d241ea13c7bab2220f56..c99f30cc62088c030bd8a806df79b738b62a968f 100644 (file)
@@ -44,8 +44,10 @@ struct pvr_file;
  * Bits not defined anywhere are "undefined".
  *
  * CPU mapping options
- *    :PVR_BO_CPU_CACHED: By default, all GEM objects are mapped write-combined on the CPU. Set this
- *       flag to override this behaviour and map the object cached.
+ *    :PVR_BO_CPU_CACHED: By default, all GEM objects are mapped write-combined on the CPU. Set
+ *       this flag to override this behaviour and map the object cached. If the dma_coherent
+ *       property is present in devicetree, all allocations will be mapped as if this flag was set.
+ *       This does not require any additional consideration at allocation time.
  *
  * Firmware options
  *    :PVR_BO_FW_NO_CLEAR_ON_RESET: By default, all FW objects are cleared and reinitialised on hard
index 4fe70610ed94cf707e631f8148af081a94f97327..450d476d183f0173d0ef03f0d8897fbeb04831a2 100644 (file)
@@ -17,6 +17,7 @@
 #include <linux/dma-mapping.h>
 #include <linux/kmemleak.h>
 #include <linux/minmax.h>
+#include <linux/property.h>
 #include <linux/sizes.h>
 
 #define PVR_SHIFT_FROM_SIZE(size_) (__builtin_ctzll(size_))
@@ -259,6 +260,7 @@ pvr_mmu_backing_page_init(struct pvr_mmu_backing_page *page,
        struct device *dev = from_pvr_device(pvr_dev)->dev;
 
        struct page *raw_page;
+       pgprot_t prot;
        int err;
 
        dma_addr_t dma_addr;
@@ -268,7 +270,11 @@ pvr_mmu_backing_page_init(struct pvr_mmu_backing_page *page,
        if (!raw_page)
                return -ENOMEM;
 
-       host_ptr = vmap(&raw_page, 1, VM_MAP, pgprot_writecombine(PAGE_KERNEL));
+       prot = PAGE_KERNEL;
+       if (device_get_dma_attr(dev) != DEV_DMA_COHERENT)
+               prot = pgprot_writecombine(prot);
+
+       host_ptr = vmap(&raw_page, 1, VM_MAP, prot);
        if (!host_ptr) {
                err = -ENOMEM;
                goto err_free_page;