drm/imagination: Add GEM and VM related code
authorDonald Robson <donald.robson@imgtec.com>
Wed, 22 Nov 2023 16:34:32 +0000 (16:34 +0000)
committerMaxime Ripard <mripard@kernel.org>
Thu, 23 Nov 2023 08:01:46 +0000 (09:01 +0100)
Add a GEM implementation based on drm_gem_shmem, and support code for the
PowerVR GPU MMU. The GPU VA manager is used for address space management.

Changes since v8:
- Updated for changes to drm_gpuvm
- Switched to dma_resv locking for vm ops
- Removed linked lists for collecting BOs in vm_context and for freeing
  after ops. This is now done internally in drm_gpuvm
- Corrected license identifiers

Changes since v7:
- kernel-doc fixes
- Remove prefixes from DRM_PVR_BO_* flags
- CREATE_BO ioctl now returns an error if provided size isn't page aligned
- Optimised MMU flushes

Changes since v6:
- Don't initialise kernel_vm_ctx when using MIPS firmware processor
- Rename drm_gpuva_manager uses to drm_gpuvm
- Sync GEM object to device on creation

Changes since v5:
- Use WRITE_ONCE() when writing to page tables
- Add memory barriers to page table insertion
- Fixed double backing page alloc on page table objects
- Fix BO mask checks in DRM_IOCTL_PVR_CREATE_BO handler
- Document use of pvr_page_table_*_idx when preallocing page table objs
- Remove pvr_vm_gpuva_mapping_init()
- Remove NULL check for unmap op in remap function
- Protect gem object with mutex during drm_gpuva_link/unlink
- Defer free or release of page table pages until after TLB flush
- Use drm_gpuva_op_remap_get_unmap_range() helper

Changes since v4:
- Correct sync function in vmap/vunmap function documentation
- Update for upstream GPU VA manager
- Fix missing frees when unmapping drm_gpuva objects
- Always zero GEM BOs on creation

Changes since v3:
- Split MMU and VM code
- Register page table allocations with kmemleak
- Use drm_dev_{enter,exit}

Changes since v2:
- Use GPU VA manager
- Use drm_gem_shmem

Co-developed-by: Matt Coster <matt.coster@imgtec.com>
Signed-off-by: Matt Coster <matt.coster@imgtec.com>
Co-developed-by: Donald Robson <donald.robson@imgtec.com>
Signed-off-by: Donald Robson <donald.robson@imgtec.com>
Signed-off-by: Sarah Walker <sarah.walker@imgtec.com>
Link: https://lore.kernel.org/r/3c96dd170efe759b73897e3675d7310a7c4b06d0.1700668843.git.donald.robson@imgtec.com
Signed-off-by: Maxime Ripard <mripard@kernel.org>
drivers/gpu/drm/imagination/Kconfig
drivers/gpu/drm/imagination/Makefile
drivers/gpu/drm/imagination/pvr_device.c
drivers/gpu/drm/imagination/pvr_device.h
drivers/gpu/drm/imagination/pvr_drv.c
drivers/gpu/drm/imagination/pvr_gem.c [new file with mode: 0644]
drivers/gpu/drm/imagination/pvr_gem.h [new file with mode: 0644]
drivers/gpu/drm/imagination/pvr_mmu.c [new file with mode: 0644]
drivers/gpu/drm/imagination/pvr_mmu.h [new file with mode: 0644]
drivers/gpu/drm/imagination/pvr_vm.c [new file with mode: 0644]
drivers/gpu/drm/imagination/pvr_vm.h [new file with mode: 0644]

index 03d0a5031f5ccb1f0d13bfbdea15f9d9204c6f73..6f82edf89144ccf1f6e2207d3a1df67460984d27 100644 (file)
@@ -7,6 +7,7 @@ config DRM_POWERVR
        depends on DRM
        select DRM_GEM_SHMEM_HELPER
        select DRM_SCHED
+       select DRM_GPUVM
        select FW_LOADER
        help
          Choose this option if you have a system that has an Imagination
index 0b863e9f51b86dc3dc5f745495d8f6826527eb8b..678c3dbb43260f90f6cd396b1ba186354b04e17a 100644 (file)
@@ -7,6 +7,9 @@ powervr-y := \
        pvr_device.o \
        pvr_device_info.o \
        pvr_drv.o \
-       pvr_fw.o
+       pvr_fw.o \
+       pvr_gem.o \
+       pvr_mmu.o \
+       pvr_vm.o
 
 obj-$(CONFIG_DRM_POWERVR) += powervr.o
index 05e382cacb277ec37caf4689dc4b890c130d363a..201ae780494f865807ebe5fe4d731b63b9fcd4c0 100644 (file)
@@ -6,6 +6,7 @@
 
 #include "pvr_fw.h"
 #include "pvr_rogue_cr_defs.h"
+#include "pvr_vm.h"
 
 #include <drm/drm_print.h>
 
@@ -312,7 +313,30 @@ pvr_device_gpu_init(struct pvr_device *pvr_dev)
        else
                return -EINVAL;
 
-       return pvr_set_dma_info(pvr_dev);
+       err = pvr_set_dma_info(pvr_dev);
+       if (err)
+               return err;
+
+       if (pvr_dev->fw_dev.processor_type != PVR_FW_PROCESSOR_TYPE_MIPS) {
+               pvr_dev->kernel_vm_ctx = pvr_vm_create_context(pvr_dev, false);
+               if (IS_ERR(pvr_dev->kernel_vm_ctx))
+                       return PTR_ERR(pvr_dev->kernel_vm_ctx);
+       }
+
+       return 0;
+}
+
+/**
+ * pvr_device_gpu_fini() - GPU-specific deinitialization for a PowerVR device
+ * @pvr_dev: Target PowerVR device.
+ */
+static void
+pvr_device_gpu_fini(struct pvr_device *pvr_dev)
+{
+       if (pvr_dev->fw_dev.processor_type != PVR_FW_PROCESSOR_TYPE_MIPS) {
+               WARN_ON(!pvr_vm_context_put(pvr_dev->kernel_vm_ctx));
+               pvr_dev->kernel_vm_ctx = NULL;
+       }
 }
 
 /**
@@ -364,6 +388,7 @@ pvr_device_fini(struct pvr_device *pvr_dev)
         * Deinitialization stages are performed in reverse order compared to
         * the initialization stages in pvr_device_init().
         */
+       pvr_device_gpu_fini(pvr_dev);
 }
 
 bool
index 36fe67bed27ff05523152665212cbf21cfb34cc6..bfc853ffd58fbcd1059c8512a85c2b8b3693820a 100644 (file)
@@ -123,8 +123,24 @@ struct pvr_device {
         */
        struct clk *mem_clk;
 
+       /**
+        * @kernel_vm_ctx: Virtual memory context used for kernel mappings.
+        *
+        * This is used for mappings in the firmware address region when a META firmware processor
+        * is in use.
+        *
+        * When a MIPS firmware processor is in use, this will be %NULL.
+        */
+       struct pvr_vm_context *kernel_vm_ctx;
+
        /** @fw_dev: Firmware related data. */
        struct pvr_fw_device fw_dev;
+
+       /**
+        * @mmu_flush_cache_flags: Records which MMU caches require flushing
+        * before submitting the next job.
+        */
+       atomic_t mmu_flush_cache_flags;
 };
 
 /**
@@ -145,6 +161,14 @@ struct pvr_file {
         *           to_pvr_device().
         */
        struct pvr_device *pvr_dev;
+
+       /**
+        * @vm_ctx_handles: Array of VM contexts belonging to this file. Array
+        * members are of type "struct pvr_vm_context *".
+        *
+        * This array is used to allocate handles returned to userspace.
+        */
+       struct xarray vm_ctx_handles;
 };
 
 /**
index 1e4408f33e94e423ed75d33c63b5936f237e4832..6d3bc886e1c3bcf298624b43289afcd77c8820f9 100644 (file)
@@ -3,9 +3,12 @@
 
 #include "pvr_device.h"
 #include "pvr_drv.h"
+#include "pvr_gem.h"
+#include "pvr_mmu.h"
 #include "pvr_rogue_defs.h"
 #include "pvr_rogue_fwif_client.h"
 #include "pvr_rogue_fwif_shared.h"
+#include "pvr_vm.h"
 
 #include <uapi/drm/pvr_drm.h>
 
@@ -60,7 +63,72 @@ static int
 pvr_ioctl_create_bo(struct drm_device *drm_dev, void *raw_args,
                    struct drm_file *file)
 {
-       return -ENOTTY;
+       struct drm_pvr_ioctl_create_bo_args *args = raw_args;
+       struct pvr_device *pvr_dev = to_pvr_device(drm_dev);
+       struct pvr_file *pvr_file = to_pvr_file(file);
+
+       struct pvr_gem_object *pvr_obj;
+       size_t sanitized_size;
+
+       int idx;
+       int err;
+
+       if (!drm_dev_enter(drm_dev, &idx))
+               return -EIO;
+
+       /* All padding fields must be zeroed. */
+       if (args->_padding_c != 0) {
+               err = -EINVAL;
+               goto err_drm_dev_exit;
+       }
+
+       /*
+        * On 64-bit platforms (our primary target), size_t is a u64. However,
+        * on other architectures we have to check for overflow when casting
+        * down to size_t from u64.
+        *
+        * We also disallow zero-sized allocations, and reserved (kernel-only)
+        * flags.
+        */
+       if (args->size > SIZE_MAX || args->size == 0 || args->flags &
+           ~DRM_PVR_BO_FLAGS_MASK || args->size & (PVR_DEVICE_PAGE_SIZE - 1)) {
+               err = -EINVAL;
+               goto err_drm_dev_exit;
+       }
+
+       sanitized_size = (size_t)args->size;
+
+       /*
+        * Create a buffer object and transfer ownership to a userspace-
+        * accessible handle.
+        */
+       pvr_obj = pvr_gem_object_create(pvr_dev, sanitized_size, args->flags);
+       if (IS_ERR(pvr_obj)) {
+               err = PTR_ERR(pvr_obj);
+               goto err_drm_dev_exit;
+       }
+
+       /* This function will not modify &args->handle unless it succeeds. */
+       err = pvr_gem_object_into_handle(pvr_obj, pvr_file, &args->handle);
+       if (err)
+               goto err_destroy_obj;
+
+       drm_dev_exit(idx);
+
+       return 0;
+
+err_destroy_obj:
+       /*
+        * GEM objects are refcounted, so there is no explicit destructor
+        * function. Instead, we release the singular reference we currently
+        * hold on the object and let GEM take care of the rest.
+        */
+       pvr_gem_object_put(pvr_obj);
+
+err_drm_dev_exit:
+       drm_dev_exit(idx);
+
+       return err;
 }
 
 /**
@@ -87,7 +155,61 @@ static int
 pvr_ioctl_get_bo_mmap_offset(struct drm_device *drm_dev, void *raw_args,
                             struct drm_file *file)
 {
-       return -ENOTTY;
+       struct drm_pvr_ioctl_get_bo_mmap_offset_args *args = raw_args;
+       struct pvr_file *pvr_file = to_pvr_file(file);
+       struct pvr_gem_object *pvr_obj;
+       struct drm_gem_object *gem_obj;
+       int idx;
+       int ret;
+
+       if (!drm_dev_enter(drm_dev, &idx))
+               return -EIO;
+
+       /* All padding fields must be zeroed. */
+       if (args->_padding_4 != 0) {
+               ret = -EINVAL;
+               goto err_drm_dev_exit;
+       }
+
+       /*
+        * Obtain a kernel reference to the buffer object. This reference is
+        * counted and must be manually dropped before returning. If a buffer
+        * object cannot be found for the specified handle, return -%ENOENT (No
+        * such file or directory).
+        */
+       pvr_obj = pvr_gem_object_from_handle(pvr_file, args->handle);
+       if (!pvr_obj) {
+               ret = -ENOENT;
+               goto err_drm_dev_exit;
+       }
+
+       gem_obj = gem_from_pvr_gem(pvr_obj);
+
+       /*
+        * Allocate a fake offset which can be used in userspace calls to mmap
+        * on the DRM device file. If this fails, return the error code. This
+        * operation is idempotent.
+        */
+       ret = drm_gem_create_mmap_offset(gem_obj);
+       if (ret != 0) {
+               /* Drop our reference to the buffer object. */
+               drm_gem_object_put(gem_obj);
+               goto err_drm_dev_exit;
+       }
+
+       /*
+        * Read out the fake offset allocated by the earlier call to
+        * drm_gem_create_mmap_offset.
+        */
+       args->offset = drm_vma_node_offset_addr(&gem_obj->vma_node);
+
+       /* Drop our reference to the buffer object. */
+       pvr_gem_object_put(pvr_obj);
+
+err_drm_dev_exit:
+       drm_dev_exit(idx);
+
+       return ret;
 }
 
 static __always_inline u64
@@ -516,10 +638,12 @@ pvr_ioctl_dev_query(struct drm_device *drm_dev, void *raw_args,
                break;
 
        case DRM_PVR_DEV_QUERY_HEAP_INFO_GET:
-               return -EINVAL;
+               ret = pvr_heap_info_get(pvr_dev, args);
+               break;
 
        case DRM_PVR_DEV_QUERY_STATIC_DATA_AREAS_GET:
-               return -EINVAL;
+               ret = pvr_static_data_areas_get(pvr_dev, args);
+               break;
        }
 
        drm_dev_exit(idx);
@@ -666,7 +790,46 @@ static int
 pvr_ioctl_create_vm_context(struct drm_device *drm_dev, void *raw_args,
                            struct drm_file *file)
 {
-       return -ENOTTY;
+       struct drm_pvr_ioctl_create_vm_context_args *args = raw_args;
+       struct pvr_file *pvr_file = to_pvr_file(file);
+       struct pvr_vm_context *vm_ctx;
+       int idx;
+       int err;
+
+       if (!drm_dev_enter(drm_dev, &idx))
+               return -EIO;
+
+       if (args->_padding_4) {
+               err = -EINVAL;
+               goto err_drm_dev_exit;
+       }
+
+       vm_ctx = pvr_vm_create_context(pvr_file->pvr_dev, true);
+       if (IS_ERR(vm_ctx)) {
+               err = PTR_ERR(vm_ctx);
+               goto err_drm_dev_exit;
+       }
+
+       /* Allocate object handle for userspace. */
+       err = xa_alloc(&pvr_file->vm_ctx_handles,
+                      &args->handle,
+                      vm_ctx,
+                      xa_limit_32b,
+                      GFP_KERNEL);
+       if (err < 0)
+               goto err_cleanup;
+
+       drm_dev_exit(idx);
+
+       return 0;
+
+err_cleanup:
+       pvr_vm_context_put(vm_ctx);
+
+err_drm_dev_exit:
+       drm_dev_exit(idx);
+
+       return err;
 }
 
 /**
@@ -686,7 +849,19 @@ static int
 pvr_ioctl_destroy_vm_context(struct drm_device *drm_dev, void *raw_args,
                             struct drm_file *file)
 {
-       return -ENOTTY;
+       struct drm_pvr_ioctl_destroy_vm_context_args *args = raw_args;
+       struct pvr_file *pvr_file = to_pvr_file(file);
+       struct pvr_vm_context *vm_ctx;
+
+       if (args->_padding_4)
+               return -EINVAL;
+
+       vm_ctx = xa_erase(&pvr_file->vm_ctx_handles, args->handle);
+       if (!vm_ctx)
+               return -EINVAL;
+
+       pvr_vm_context_put(vm_ctx);
+       return 0;
 }
 
 /**
@@ -716,7 +891,79 @@ static int
 pvr_ioctl_vm_map(struct drm_device *drm_dev, void *raw_args,
                 struct drm_file *file)
 {
-       return -ENOTTY;
+       struct pvr_device *pvr_dev = to_pvr_device(drm_dev);
+       struct drm_pvr_ioctl_vm_map_args *args = raw_args;
+       struct pvr_file *pvr_file = to_pvr_file(file);
+       struct pvr_vm_context *vm_ctx;
+
+       struct pvr_gem_object *pvr_obj;
+       size_t pvr_obj_size;
+
+       u64 offset_plus_size;
+       int idx;
+       int err;
+
+       if (!drm_dev_enter(drm_dev, &idx))
+               return -EIO;
+
+       /* Initial validation of args. */
+       if (args->_padding_14) {
+               err = -EINVAL;
+               goto err_drm_dev_exit;
+       }
+
+       if (args->flags != 0 ||
+           check_add_overflow(args->offset, args->size, &offset_plus_size) ||
+           !pvr_find_heap_containing(pvr_dev, args->device_addr, args->size)) {
+               err = -EINVAL;
+               goto err_drm_dev_exit;
+       }
+
+       vm_ctx = pvr_vm_context_lookup(pvr_file, args->vm_context_handle);
+       if (!vm_ctx) {
+               err = -EINVAL;
+               goto err_drm_dev_exit;
+       }
+
+       pvr_obj = pvr_gem_object_from_handle(pvr_file, args->handle);
+       if (!pvr_obj) {
+               err = -ENOENT;
+               goto err_put_vm_context;
+       }
+
+       pvr_obj_size = pvr_gem_object_size(pvr_obj);
+
+       /*
+        * Validate offset and size args. The alignment of these will be
+        * checked when mapping; for now just check that they're within valid
+        * bounds
+        */
+       if (args->offset >= pvr_obj_size || offset_plus_size > pvr_obj_size) {
+               err = -EINVAL;
+               goto err_put_pvr_object;
+       }
+
+       err = pvr_vm_map(vm_ctx, pvr_obj, args->offset,
+                        args->device_addr, args->size);
+       if (err)
+               goto err_put_pvr_object;
+
+       /*
+        * In order to set up the mapping, we needed a reference to &pvr_obj.
+        * However, pvr_vm_map() obtains and stores its own reference, so we
+        * must release ours before returning.
+        */
+
+err_put_pvr_object:
+       pvr_gem_object_put(pvr_obj);
+
+err_put_vm_context:
+       pvr_vm_context_put(vm_ctx);
+
+err_drm_dev_exit:
+       drm_dev_exit(idx);
+
+       return err;
 }
 
 /**
@@ -739,7 +986,24 @@ static int
 pvr_ioctl_vm_unmap(struct drm_device *drm_dev, void *raw_args,
                   struct drm_file *file)
 {
-       return -ENOTTY;
+       struct drm_pvr_ioctl_vm_unmap_args *args = raw_args;
+       struct pvr_file *pvr_file = to_pvr_file(file);
+       struct pvr_vm_context *vm_ctx;
+       int err;
+
+       /* Initial validation of args. */
+       if (args->_padding_4)
+               return -EINVAL;
+
+       vm_ctx = pvr_vm_context_lookup(pvr_file, args->vm_context_handle);
+       if (!vm_ctx)
+               return -EINVAL;
+
+       err = pvr_vm_unmap(vm_ctx, args->device_addr, args->size);
+
+       pvr_vm_context_put(vm_ctx);
+
+       return err;
 }
 
 /*
@@ -930,6 +1194,8 @@ pvr_drm_driver_open(struct drm_device *drm_dev, struct drm_file *file)
         */
        pvr_file->pvr_dev = pvr_dev;
 
+       xa_init_flags(&pvr_file->vm_ctx_handles, XA_FLAGS_ALLOC1);
+
        /*
         * Store reference to powervr-specific file private data in DRM file
         * private data.
@@ -955,6 +1221,9 @@ pvr_drm_driver_postclose(__always_unused struct drm_device *drm_dev,
 {
        struct pvr_file *pvr_file = to_pvr_file(file);
 
+       /* Drop references on any remaining objects. */
+       pvr_destroy_vm_contexts_for_file(pvr_file);
+
        kfree(pvr_file);
        file->driver_priv = NULL;
 }
@@ -962,7 +1231,7 @@ pvr_drm_driver_postclose(__always_unused struct drm_device *drm_dev,
 DEFINE_DRM_GEM_FOPS(pvr_drm_driver_fops);
 
 static struct drm_driver pvr_drm_driver = {
-       .driver_features = DRIVER_RENDER,
+       .driver_features = DRIVER_GEM | DRIVER_GEM_GPUVA | DRIVER_RENDER,
        .open = pvr_drm_driver_open,
        .postclose = pvr_drm_driver_postclose,
        .ioctls = pvr_drm_driver_ioctls,
@@ -976,6 +1245,8 @@ static struct drm_driver pvr_drm_driver = {
        .minor = PVR_DRIVER_MINOR,
        .patchlevel = PVR_DRIVER_PATCHLEVEL,
 
+       .gem_prime_import_sg_table = drm_gem_shmem_prime_import_sg_table,
+       .gem_create_object = pvr_gem_create_object,
 };
 
 static int
diff --git a/drivers/gpu/drm/imagination/pvr_gem.c b/drivers/gpu/drm/imagination/pvr_gem.c
new file mode 100644 (file)
index 0000000..6a8c81f
--- /dev/null
@@ -0,0 +1,414 @@
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
+/* Copyright (c) 2023 Imagination Technologies Ltd. */
+
+#include "pvr_device.h"
+#include "pvr_gem.h"
+#include "pvr_vm.h"
+
+#include <drm/drm_gem.h>
+#include <drm/drm_prime.h>
+
+#include <linux/compiler.h>
+#include <linux/compiler_attributes.h>
+#include <linux/dma-buf.h>
+#include <linux/dma-direction.h>
+#include <linux/dma-mapping.h>
+#include <linux/err.h>
+#include <linux/gfp.h>
+#include <linux/iosys-map.h>
+#include <linux/log2.h>
+#include <linux/mutex.h>
+#include <linux/pagemap.h>
+#include <linux/refcount.h>
+#include <linux/scatterlist.h>
+
+static void pvr_gem_object_free(struct drm_gem_object *obj)
+{
+       drm_gem_shmem_object_free(obj);
+}
+
+static int pvr_gem_mmap(struct drm_gem_object *gem_obj, struct vm_area_struct *vma)
+{
+       struct pvr_gem_object *pvr_obj = gem_to_pvr_gem(gem_obj);
+       struct drm_gem_shmem_object *shmem_obj = shmem_gem_from_pvr_gem(pvr_obj);
+
+       if (!(pvr_obj->flags & DRM_PVR_BO_ALLOW_CPU_USERSPACE_ACCESS))
+               return -EINVAL;
+
+       return drm_gem_shmem_mmap(shmem_obj, vma);
+}
+
+static const struct drm_gem_object_funcs pvr_gem_object_funcs = {
+       .free = pvr_gem_object_free,
+       .print_info = drm_gem_shmem_object_print_info,
+       .pin = drm_gem_shmem_object_pin,
+       .unpin = drm_gem_shmem_object_unpin,
+       .get_sg_table = drm_gem_shmem_object_get_sg_table,
+       .vmap = drm_gem_shmem_object_vmap,
+       .vunmap = drm_gem_shmem_object_vunmap,
+       .mmap = pvr_gem_mmap,
+       .vm_ops = &drm_gem_shmem_vm_ops,
+};
+
+/**
+ * pvr_gem_object_flags_validate() - Verify that a collection of PowerVR GEM
+ * mapping and/or creation flags form a valid combination.
+ * @flags: PowerVR GEM mapping/creation flags to validate.
+ *
+ * This function explicitly allows kernel-only flags. All ioctl entrypoints
+ * should do their own validation as well as relying on this function.
+ *
+ * Return:
+ *  * %true if @flags contains valid mapping and/or creation flags, or
+ *  * %false otherwise.
+ */
+static bool
+pvr_gem_object_flags_validate(u64 flags)
+{
+       static const u64 invalid_combinations[] = {
+               /*
+                * Memory flagged as PM/FW-protected cannot be mapped to
+                * userspace. To make this explicit, we require that the two
+                * flags allowing each of these respective features are never
+                * specified together.
+                */
+               (DRM_PVR_BO_PM_FW_PROTECT |
+                DRM_PVR_BO_ALLOW_CPU_USERSPACE_ACCESS),
+       };
+
+       int i;
+
+       /*
+        * Check for bits set in undefined regions. Reserved regions refer to
+        * options that can only be set by the kernel. These are explicitly
+        * allowed in most cases, and must be checked specifically in IOCTL
+        * callback code.
+        */
+       if ((flags & PVR_BO_UNDEFINED_MASK) != 0)
+               return false;
+
+       /*
+        * Check for all combinations of flags marked as invalid in the array
+        * above.
+        */
+       for (i = 0; i < ARRAY_SIZE(invalid_combinations); ++i) {
+               u64 combo = invalid_combinations[i];
+
+               if ((flags & combo) == combo)
+                       return false;
+       }
+
+       return true;
+}
+
+/**
+ * pvr_gem_object_into_handle() - Convert a reference to an object into a
+ * userspace-accessible handle.
+ * @pvr_obj: [IN] Target PowerVR-specific object.
+ * @pvr_file: [IN] File to associate the handle with.
+ * @handle: [OUT] Pointer to store the created handle in. Remains unmodified if
+ * an error is encountered.
+ *
+ * If an error is encountered, ownership of @pvr_obj will not have been
+ * transferred. If this function succeeds, however, further use of @pvr_obj is
+ * considered undefined behaviour unless another reference to it is explicitly
+ * held.
+ *
+ * Return:
+ *  * 0 on success, or
+ *  * Any error encountered while attempting to allocate a handle on @pvr_file.
+ */
+int
+pvr_gem_object_into_handle(struct pvr_gem_object *pvr_obj,
+                          struct pvr_file *pvr_file, u32 *handle)
+{
+       struct drm_gem_object *gem_obj = gem_from_pvr_gem(pvr_obj);
+       struct drm_file *file = from_pvr_file(pvr_file);
+
+       u32 new_handle;
+       int err;
+
+       err = drm_gem_handle_create(file, gem_obj, &new_handle);
+       if (err)
+               return err;
+
+       /*
+        * Release our reference to @pvr_obj, effectively transferring
+        * ownership to the handle.
+        */
+       pvr_gem_object_put(pvr_obj);
+
+       /*
+        * Do not store the new handle in @handle until no more errors can
+        * occur.
+        */
+       *handle = new_handle;
+
+       return 0;
+}
+
+/**
+ * pvr_gem_object_from_handle() - Obtain a reference to an object from a
+ * userspace handle.
+ * @pvr_file: PowerVR-specific file to which @handle is associated.
+ * @handle: Userspace handle referencing the target object.
+ *
+ * On return, @handle always maintains its reference to the requested object
+ * (if it had one in the first place). If this function succeeds, the returned
+ * object will hold an additional reference. When the caller is finished with
+ * the returned object, they should call pvr_gem_object_put() on it to release
+ * this reference.
+ *
+ * Return:
+ *  * A pointer to the requested PowerVR-specific object on success, or
+ *  * %NULL otherwise.
+ */
+struct pvr_gem_object *
+pvr_gem_object_from_handle(struct pvr_file *pvr_file, u32 handle)
+{
+       struct drm_file *file = from_pvr_file(pvr_file);
+       struct drm_gem_object *gem_obj;
+
+       gem_obj = drm_gem_object_lookup(file, handle);
+       if (!gem_obj)
+               return NULL;
+
+       return gem_to_pvr_gem(gem_obj);
+}
+
+/**
+ * pvr_gem_object_vmap() - Map a PowerVR GEM object into CPU virtual address
+ * space.
+ * @pvr_obj: Target PowerVR GEM object.
+ *
+ * Once the caller is finished with the CPU mapping, they must call
+ * pvr_gem_object_vunmap() on @pvr_obj.
+ *
+ * If @pvr_obj is CPU-cached, dma_sync_sgtable_for_cpu() is called to make
+ * sure the CPU mapping is consistent.
+ *
+ * Return:
+ *  * A pointer to the CPU mapping on success,
+ *  * -%ENOMEM if the mapping fails, or
+ *  * Any error encountered while attempting to acquire a reference to the
+ *    backing pages for @pvr_obj.
+ */
+void *
+pvr_gem_object_vmap(struct pvr_gem_object *pvr_obj)
+{
+       struct drm_gem_shmem_object *shmem_obj = shmem_gem_from_pvr_gem(pvr_obj);
+       struct drm_gem_object *obj = gem_from_pvr_gem(pvr_obj);
+       struct iosys_map map;
+       int err;
+
+       dma_resv_lock(obj->resv, NULL);
+
+       err = drm_gem_shmem_vmap(shmem_obj, &map);
+       if (err)
+               goto err_unlock;
+
+       if (pvr_obj->flags & PVR_BO_CPU_CACHED) {
+               struct device *dev = shmem_obj->base.dev->dev;
+
+               /* If shmem_obj->sgt is NULL, that means the buffer hasn't been mapped
+                * in GPU space yet.
+                */
+               if (shmem_obj->sgt)
+                       dma_sync_sgtable_for_cpu(dev, shmem_obj->sgt, DMA_BIDIRECTIONAL);
+       }
+
+       dma_resv_unlock(obj->resv);
+
+       return map.vaddr;
+
+err_unlock:
+       dma_resv_unlock(obj->resv);
+
+       return ERR_PTR(err);
+}
+
+/**
+ * pvr_gem_object_vunmap() - Unmap a PowerVR memory object from CPU virtual
+ * address space.
+ * @pvr_obj: Target PowerVR GEM object.
+ *
+ * If @pvr_obj is CPU-cached, dma_sync_sgtable_for_device() is called to make
+ * sure the GPU mapping is consistent.
+ */
+void
+pvr_gem_object_vunmap(struct pvr_gem_object *pvr_obj)
+{
+       struct drm_gem_shmem_object *shmem_obj = shmem_gem_from_pvr_gem(pvr_obj);
+       struct iosys_map map = IOSYS_MAP_INIT_VADDR(shmem_obj->vaddr);
+       struct drm_gem_object *obj = gem_from_pvr_gem(pvr_obj);
+
+       if (WARN_ON(!map.vaddr))
+               return;
+
+       dma_resv_lock(obj->resv, NULL);
+
+       if (pvr_obj->flags & PVR_BO_CPU_CACHED) {
+               struct device *dev = shmem_obj->base.dev->dev;
+
+               /* If shmem_obj->sgt is NULL, that means the buffer hasn't been mapped
+                * in GPU space yet.
+                */
+               if (shmem_obj->sgt)
+                       dma_sync_sgtable_for_device(dev, shmem_obj->sgt, DMA_BIDIRECTIONAL);
+       }
+
+       drm_gem_shmem_vunmap(shmem_obj, &map);
+
+       dma_resv_unlock(obj->resv);
+}
+
+/**
+ * pvr_gem_object_zero() - Zeroes the physical memory behind an object.
+ * @pvr_obj: Target PowerVR GEM object.
+ *
+ * Return:
+ *  * 0 on success, or
+ *  * Any error encountered while attempting to map @pvr_obj to the CPU (see
+ *    pvr_gem_object_vmap()).
+ */
+static int
+pvr_gem_object_zero(struct pvr_gem_object *pvr_obj)
+{
+       void *cpu_ptr;
+
+       cpu_ptr = pvr_gem_object_vmap(pvr_obj);
+       if (IS_ERR(cpu_ptr))
+               return PTR_ERR(cpu_ptr);
+
+       memset(cpu_ptr, 0, pvr_gem_object_size(pvr_obj));
+
+       /* Make sure the zero-ing is done before vumap-ing the object. */
+       wmb();
+
+       pvr_gem_object_vunmap(pvr_obj);
+
+       return 0;
+}
+
+/**
+ * pvr_gem_create_object() - Allocate and pre-initializes a pvr_gem_object
+ * @drm_dev: DRM device creating this object.
+ * @size: Size of the object to allocate in bytes.
+ *
+ * Return:
+ *  * The new pre-initialized GEM object on success,
+ *  * -ENOMEM if the allocation failed.
+ */
+struct drm_gem_object *pvr_gem_create_object(struct drm_device *drm_dev, size_t size)
+{
+       struct drm_gem_object *gem_obj;
+       struct pvr_gem_object *pvr_obj;
+
+       pvr_obj = kzalloc(sizeof(*pvr_obj), GFP_KERNEL);
+       if (!pvr_obj)
+               return ERR_PTR(-ENOMEM);
+
+       gem_obj = gem_from_pvr_gem(pvr_obj);
+       gem_obj->funcs = &pvr_gem_object_funcs;
+
+       return gem_obj;
+}
+
+/**
+ * pvr_gem_object_create() - Creates a PowerVR-specific buffer object.
+ * @pvr_dev: Target PowerVR device.
+ * @size: Size of the object to allocate in bytes. Must be greater than zero.
+ * Any value which is not an exact multiple of the system page size will be
+ * rounded up to satisfy this condition.
+ * @flags: Options which affect both this operation and future mapping
+ * operations performed on the returned object. Must be a combination of
+ * DRM_PVR_BO_* and/or PVR_BO_* flags.
+ *
+ * The created object may be larger than @size, but can never be smaller. To
+ * get the exact size, call pvr_gem_object_size() on the returned pointer.
+ *
+ * Return:
+ *  * The newly-minted PowerVR-specific buffer object on success,
+ *  * -%EINVAL if @size is zero or @flags is not valid,
+ *  * -%ENOMEM if sufficient physical memory cannot be allocated, or
+ *  * Any other error returned by drm_gem_create_mmap_offset().
+ */
+struct pvr_gem_object *
+pvr_gem_object_create(struct pvr_device *pvr_dev, size_t size, u64 flags)
+{
+       struct drm_gem_shmem_object *shmem_obj;
+       struct pvr_gem_object *pvr_obj;
+       struct sg_table *sgt;
+       int err;
+
+       /* Verify @size and @flags before continuing. */
+       if (size == 0 || !pvr_gem_object_flags_validate(flags))
+               return ERR_PTR(-EINVAL);
+
+       shmem_obj = drm_gem_shmem_create(from_pvr_device(pvr_dev), size);
+       if (IS_ERR(shmem_obj))
+               return ERR_CAST(shmem_obj);
+
+       shmem_obj->pages_mark_dirty_on_put = true;
+       shmem_obj->map_wc = !(flags & PVR_BO_CPU_CACHED);
+       pvr_obj = shmem_gem_to_pvr_gem(shmem_obj);
+       pvr_obj->flags = flags;
+
+       sgt = drm_gem_shmem_get_pages_sgt(shmem_obj);
+       if (IS_ERR(sgt)) {
+               err = PTR_ERR(sgt);
+               goto err_shmem_object_free;
+       }
+
+       dma_sync_sgtable_for_device(shmem_obj->base.dev->dev, sgt,
+                                   DMA_BIDIRECTIONAL);
+
+       /*
+        * Do this last because pvr_gem_object_zero() requires a fully
+        * configured instance of struct pvr_gem_object.
+        */
+       pvr_gem_object_zero(pvr_obj);
+
+       return pvr_obj;
+
+err_shmem_object_free:
+       drm_gem_shmem_free(shmem_obj);
+
+       return ERR_PTR(err);
+}
+
+/**
+ * pvr_gem_get_dma_addr() - Get DMA address for given offset in object
+ * @pvr_obj: Pointer to object to lookup address in.
+ * @offset: Offset within object to lookup address at.
+ * @dma_addr_out: Pointer to location to store DMA address.
+ *
+ * Returns:
+ *  * 0 on success, or
+ *  * -%EINVAL if object is not currently backed, or if @offset is out of valid
+ *    range for this object.
+ */
+int
+pvr_gem_get_dma_addr(struct pvr_gem_object *pvr_obj, u32 offset,
+                    dma_addr_t *dma_addr_out)
+{
+       struct drm_gem_shmem_object *shmem_obj = shmem_gem_from_pvr_gem(pvr_obj);
+       u32 accumulated_offset = 0;
+       struct scatterlist *sgl;
+       unsigned int sgt_idx;
+
+       WARN_ON(!shmem_obj->sgt);
+       for_each_sgtable_dma_sg(shmem_obj->sgt, sgl, sgt_idx) {
+               u32 new_offset = accumulated_offset + sg_dma_len(sgl);
+
+               if (offset >= accumulated_offset && offset < new_offset) {
+                       *dma_addr_out = sg_dma_address(sgl) +
+                                       (offset - accumulated_offset);
+                       return 0;
+               }
+
+               accumulated_offset = new_offset;
+       }
+
+       return -EINVAL;
+}
diff --git a/drivers/gpu/drm/imagination/pvr_gem.h b/drivers/gpu/drm/imagination/pvr_gem.h
new file mode 100644 (file)
index 0000000..e0e5ea5
--- /dev/null
@@ -0,0 +1,170 @@
+/* SPDX-License-Identifier: GPL-2.0-only OR MIT */
+/* Copyright (c) 2023 Imagination Technologies Ltd. */
+
+#ifndef PVR_GEM_H
+#define PVR_GEM_H
+
+#include "pvr_rogue_heap_config.h"
+#include "pvr_rogue_meta.h"
+
+#include <uapi/drm/pvr_drm.h>
+
+#include <drm/drm_gem.h>
+#include <drm/drm_gem_shmem_helper.h>
+#include <drm/drm_mm.h>
+
+#include <linux/bitfield.h>
+#include <linux/bits.h>
+#include <linux/const.h>
+#include <linux/compiler_attributes.h>
+#include <linux/kernel.h>
+#include <linux/mutex.h>
+#include <linux/refcount.h>
+#include <linux/scatterlist.h>
+#include <linux/sizes.h>
+#include <linux/types.h>
+
+/* Forward declaration from "pvr_device.h". */
+struct pvr_device;
+struct pvr_file;
+
+/**
+ * DOC: Flags for DRM_IOCTL_PVR_CREATE_BO (kernel-only)
+ *
+ * Kernel-only values allowed in &pvr_gem_object->flags. The majority of options
+ * for this field are specified in the UAPI header "pvr_drm.h" with a
+ * DRM_PVR_BO_ prefix. To distinguish these internal options (which must exist
+ * in ranges marked as "reserved" in the UAPI header), we drop the DRM prefix.
+ * The public options should be used directly, DRM prefix and all.
+ *
+ * To avoid potentially confusing gaps in the UAPI options, these kernel-only
+ * options are specified "in reverse", starting at bit 63.
+ *
+ * We use "reserved" to refer to bits defined here and not exposed in the UAPI.
+ * Bits not defined anywhere are "undefined".
+ *
+ * CPU mapping options
+ *    :PVR_BO_CPU_CACHED: By default, all GEM objects are mapped write-combined on the CPU. Set this
+ *       flag to override this behaviour and map the object cached.
+ *
+ * Firmware options
+ *    :PVR_BO_FW_NO_CLEAR_ON_RESET: By default, all FW objects are cleared and reinitialised on hard
+ *       reset. Set this flag to override this behaviour and preserve buffer contents on reset.
+ */
+#define PVR_BO_CPU_CACHED BIT_ULL(63)
+
+#define PVR_BO_FW_NO_CLEAR_ON_RESET BIT_ULL(62)
+
+#define PVR_BO_KERNEL_FLAGS_MASK (PVR_BO_CPU_CACHED | PVR_BO_FW_NO_CLEAR_ON_RESET)
+
+/* Bits 61..3 are undefined. */
+/* Bits 2..0 are defined in the UAPI. */
+
+/* Other utilities. */
+#define PVR_BO_UNDEFINED_MASK ~(PVR_BO_KERNEL_FLAGS_MASK | DRM_PVR_BO_FLAGS_MASK)
+
+/*
+ * All firmware-mapped memory uses (mostly) the same flags. Specifically,
+ * firmware-mapped memory should be:
+ *  * Read/write on the device,
+ *  * Read/write on the CPU, and
+ *  * Write-combined on the CPU.
+ *
+ * The only variation is in caching on the device.
+ */
+#define PVR_BO_FW_FLAGS_DEVICE_CACHED (ULL(0))
+#define PVR_BO_FW_FLAGS_DEVICE_UNCACHED DRM_PVR_BO_BYPASS_DEVICE_CACHE
+
+/**
+ * struct pvr_gem_object - powervr-specific wrapper for &struct drm_gem_object
+ */
+struct pvr_gem_object {
+       /**
+        * @base: The underlying &struct drm_gem_shmem_object.
+        *
+        * Do not access this member directly, instead call
+        * shem_gem_from_pvr_gem().
+        */
+       struct drm_gem_shmem_object base;
+
+       /**
+        * @flags: Options set at creation-time. Some of these options apply to
+        * the creation operation itself (which are stored here for reference)
+        * with the remainder used for mapping options to both the device and
+        * CPU. These are used every time this object is mapped, but may be
+        * changed after creation.
+        *
+        * Must be a combination of DRM_PVR_BO_* and/or PVR_BO_* flags.
+        *
+        * .. note::
+        *
+        *    This member is declared const to indicate that none of these
+        *    options may change or be changed throughout the object's
+        *    lifetime.
+        */
+       u64 flags;
+
+};
+
+static_assert(offsetof(struct pvr_gem_object, base) == 0,
+             "offsetof(struct pvr_gem_object, base) not zero");
+
+#define shmem_gem_from_pvr_gem(pvr_obj) (&(pvr_obj)->base)
+
+#define shmem_gem_to_pvr_gem(shmem_obj) container_of_const(shmem_obj, struct pvr_gem_object, base)
+
+#define gem_from_pvr_gem(pvr_obj) (&(pvr_obj)->base.base)
+
+#define gem_to_pvr_gem(gem_obj) container_of_const(gem_obj, struct pvr_gem_object, base.base)
+
+/* Functions defined in pvr_gem.c */
+
+struct drm_gem_object *pvr_gem_create_object(struct drm_device *drm_dev, size_t size);
+
+struct pvr_gem_object *pvr_gem_object_create(struct pvr_device *pvr_dev,
+                                            size_t size, u64 flags);
+
+int pvr_gem_object_into_handle(struct pvr_gem_object *pvr_obj,
+                              struct pvr_file *pvr_file, u32 *handle);
+struct pvr_gem_object *pvr_gem_object_from_handle(struct pvr_file *pvr_file,
+                                                 u32 handle);
+
+static __always_inline struct sg_table *
+pvr_gem_object_get_pages_sgt(struct pvr_gem_object *pvr_obj)
+{
+       return drm_gem_shmem_get_pages_sgt(shmem_gem_from_pvr_gem(pvr_obj));
+}
+
+void *pvr_gem_object_vmap(struct pvr_gem_object *pvr_obj);
+void pvr_gem_object_vunmap(struct pvr_gem_object *pvr_obj);
+
+int pvr_gem_get_dma_addr(struct pvr_gem_object *pvr_obj, u32 offset,
+                        dma_addr_t *dma_addr_out);
+
+/**
+ * pvr_gem_object_get() - Acquire reference on pvr_gem_object
+ * @pvr_obj: Pointer to object to acquire reference on.
+ */
+static __always_inline void
+pvr_gem_object_get(struct pvr_gem_object *pvr_obj)
+{
+       drm_gem_object_get(gem_from_pvr_gem(pvr_obj));
+}
+
+/**
+ * pvr_gem_object_put() - Release reference on pvr_gem_object
+ * @pvr_obj: Pointer to object to release reference on.
+ */
+static __always_inline void
+pvr_gem_object_put(struct pvr_gem_object *pvr_obj)
+{
+       drm_gem_object_put(gem_from_pvr_gem(pvr_obj));
+}
+
+static __always_inline size_t
+pvr_gem_object_size(struct pvr_gem_object *pvr_obj)
+{
+       return gem_from_pvr_gem(pvr_obj)->size;
+}
+
+#endif /* PVR_GEM_H */
diff --git a/drivers/gpu/drm/imagination/pvr_mmu.c b/drivers/gpu/drm/imagination/pvr_mmu.c
new file mode 100644 (file)
index 0000000..7f77e03
--- /dev/null
@@ -0,0 +1,2573 @@
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
+/* Copyright (c) 2023 Imagination Technologies Ltd. */
+
+#include "pvr_mmu.h"
+
+#include "pvr_device.h"
+#include "pvr_fw.h"
+#include "pvr_gem.h"
+#include "pvr_rogue_fwif.h"
+#include "pvr_rogue_mmu_defs.h"
+
+#include <drm/drm_drv.h>
+#include <linux/atomic.h>
+#include <linux/bitops.h>
+#include <linux/dma-mapping.h>
+#include <linux/kmemleak.h>
+#include <linux/minmax.h>
+#include <linux/sizes.h>
+
+#define PVR_SHIFT_FROM_SIZE(size_) (__builtin_ctzll(size_))
+#define PVR_MASK_FROM_SIZE(size_) (~((size_) - U64_C(1)))
+
+/*
+ * The value of the device page size (%PVR_DEVICE_PAGE_SIZE) is currently
+ * pegged to the host page size (%PAGE_SIZE). This chunk of macro goodness both
+ * ensures that the selected host page size corresponds to a valid device page
+ * size and sets up values needed by the MMU code below.
+ */
+#if (PVR_DEVICE_PAGE_SIZE == SZ_4K)
+# define ROGUE_MMUCTRL_PAGE_SIZE_X ROGUE_MMUCTRL_PAGE_SIZE_4KB
+# define ROGUE_MMUCTRL_PAGE_X_RANGE_SHIFT ROGUE_MMUCTRL_PAGE_4KB_RANGE_SHIFT
+# define ROGUE_MMUCTRL_PAGE_X_RANGE_CLRMSK ROGUE_MMUCTRL_PAGE_4KB_RANGE_CLRMSK
+#elif (PVR_DEVICE_PAGE_SIZE == SZ_16K)
+# define ROGUE_MMUCTRL_PAGE_SIZE_X ROGUE_MMUCTRL_PAGE_SIZE_16KB
+# define ROGUE_MMUCTRL_PAGE_X_RANGE_SHIFT ROGUE_MMUCTRL_PAGE_16KB_RANGE_SHIFT
+# define ROGUE_MMUCTRL_PAGE_X_RANGE_CLRMSK ROGUE_MMUCTRL_PAGE_16KB_RANGE_CLRMSK
+#elif (PVR_DEVICE_PAGE_SIZE == SZ_64K)
+# define ROGUE_MMUCTRL_PAGE_SIZE_X ROGUE_MMUCTRL_PAGE_SIZE_64KB
+# define ROGUE_MMUCTRL_PAGE_X_RANGE_SHIFT ROGUE_MMUCTRL_PAGE_64KB_RANGE_SHIFT
+# define ROGUE_MMUCTRL_PAGE_X_RANGE_CLRMSK ROGUE_MMUCTRL_PAGE_64KB_RANGE_CLRMSK
+#elif (PVR_DEVICE_PAGE_SIZE == SZ_256K)
+# define ROGUE_MMUCTRL_PAGE_SIZE_X ROGUE_MMUCTRL_PAGE_SIZE_256KB
+# define ROGUE_MMUCTRL_PAGE_X_RANGE_SHIFT ROGUE_MMUCTRL_PAGE_256KB_RANGE_SHIFT
+# define ROGUE_MMUCTRL_PAGE_X_RANGE_CLRMSK ROGUE_MMUCTRL_PAGE_256KB_RANGE_CLRMSK
+#elif (PVR_DEVICE_PAGE_SIZE == SZ_1M)
+# define ROGUE_MMUCTRL_PAGE_SIZE_X ROGUE_MMUCTRL_PAGE_SIZE_1MB
+# define ROGUE_MMUCTRL_PAGE_X_RANGE_SHIFT ROGUE_MMUCTRL_PAGE_1MB_RANGE_SHIFT
+# define ROGUE_MMUCTRL_PAGE_X_RANGE_CLRMSK ROGUE_MMUCTRL_PAGE_1MB_RANGE_CLRMSK
+#elif (PVR_DEVICE_PAGE_SIZE == SZ_2M)
+# define ROGUE_MMUCTRL_PAGE_SIZE_X ROGUE_MMUCTRL_PAGE_SIZE_2MB
+# define ROGUE_MMUCTRL_PAGE_X_RANGE_SHIFT ROGUE_MMUCTRL_PAGE_2MB_RANGE_SHIFT
+# define ROGUE_MMUCTRL_PAGE_X_RANGE_CLRMSK ROGUE_MMUCTRL_PAGE_2MB_RANGE_CLRMSK
+#else
+# error Unsupported device page size PVR_DEVICE_PAGE_SIZE
+#endif
+
+#define ROGUE_MMUCTRL_ENTRIES_PT_VALUE_X   \
+       (ROGUE_MMUCTRL_ENTRIES_PT_VALUE >> \
+        (PVR_DEVICE_PAGE_SHIFT - PVR_SHIFT_FROM_SIZE(SZ_4K)))
+
+enum pvr_mmu_sync_level {
+       PVR_MMU_SYNC_LEVEL_NONE = -1,
+       PVR_MMU_SYNC_LEVEL_0 = 0,
+       PVR_MMU_SYNC_LEVEL_1 = 1,
+       PVR_MMU_SYNC_LEVEL_2 = 2,
+};
+
+#define PVR_MMU_SYNC_LEVEL_0_FLAGS (ROGUE_FWIF_MMUCACHEDATA_FLAGS_PT | \
+                                   ROGUE_FWIF_MMUCACHEDATA_FLAGS_INTERRUPT | \
+                                   ROGUE_FWIF_MMUCACHEDATA_FLAGS_TLB)
+#define PVR_MMU_SYNC_LEVEL_1_FLAGS (PVR_MMU_SYNC_LEVEL_0_FLAGS | ROGUE_FWIF_MMUCACHEDATA_FLAGS_PD)
+#define PVR_MMU_SYNC_LEVEL_2_FLAGS (PVR_MMU_SYNC_LEVEL_1_FLAGS | ROGUE_FWIF_MMUCACHEDATA_FLAGS_PC)
+
+/**
+ * pvr_mmu_set_flush_flags() - Set MMU cache flush flags for next call to
+ *                             pvr_mmu_flush_exec().
+ * @pvr_dev: Target PowerVR device.
+ * @flags: MMU flush flags. Must be one of %PVR_MMU_SYNC_LEVEL_*_FLAGS.
+ *
+ * This function must be called following any possible change to the MMU page
+ * tables.
+ */
+static void pvr_mmu_set_flush_flags(struct pvr_device *pvr_dev, u32 flags)
+{
+       atomic_fetch_or(flags, &pvr_dev->mmu_flush_cache_flags);
+}
+
+/**
+ * pvr_mmu_flush_request_all() - Request flush of all MMU caches when
+ * subsequently calling pvr_mmu_flush_exec().
+ * @pvr_dev: Target PowerVR device.
+ *
+ * This function must be called following any possible change to the MMU page
+ * tables.
+ */
+void pvr_mmu_flush_request_all(struct pvr_device *pvr_dev)
+{
+       /* TODO: implement */
+}
+
+/**
+ * pvr_mmu_flush_exec() - Execute a flush of all MMU caches previously
+ * requested.
+ * @pvr_dev: Target PowerVR device.
+ * @wait: Do not return until the flush is completed.
+ *
+ * This function must be called prior to submitting any new GPU job. The flush
+ * will complete before the jobs are scheduled, so this can be called once after
+ * a series of maps. However, a single unmap should always be immediately
+ * followed by a flush and it should be explicitly waited by setting @wait.
+ *
+ * As a failure to flush the MMU caches could risk memory corruption, if the
+ * flush fails (implying the firmware is not responding) then the GPU device is
+ * marked as lost.
+ *
+ * Returns:
+ *  * 0 on success when @wait is true, or
+ *  * -%EIO if the device is unavailable, or
+ *  * Any error encountered while submitting the flush command via the KCCB.
+ */
+int pvr_mmu_flush_exec(struct pvr_device *pvr_dev, bool wait)
+{
+       /* TODO: implement */
+       return -ENODEV;
+}
+
+/**
+ * DOC: PowerVR Virtual Memory Handling
+ */
+/**
+ * DOC: PowerVR Virtual Memory Handling (constants)
+ *
+ * .. c:macro:: PVR_IDX_INVALID
+ *
+ *    Default value for a u16-based index.
+ *
+ *    This value cannot be zero, since zero is a valid index value.
+ */
+#define PVR_IDX_INVALID ((u16)(-1))
+
+/**
+ * DOC: MMU backing pages
+ */
+/**
+ * DOC: MMU backing pages (constants)
+ *
+ * .. c:macro:: PVR_MMU_BACKING_PAGE_SIZE
+ *
+ *    Page size of a PowerVR device's integrated MMU. The CPU page size must be
+ *    at least as large as this value for the current implementation; this is
+ *    checked at compile-time.
+ */
+#define PVR_MMU_BACKING_PAGE_SIZE SZ_4K
+static_assert(PAGE_SIZE >= PVR_MMU_BACKING_PAGE_SIZE);
+
+/**
+ * struct pvr_mmu_backing_page - Represents a single page used to back a page
+ *                              table of any level.
+ * @dma_addr: DMA address of this page.
+ * @host_ptr: CPU address of this page.
+ * @pvr_dev: The PowerVR device to which this page is associated. **For
+ *           internal use only.**
+ */
+struct pvr_mmu_backing_page {
+       dma_addr_t dma_addr;
+       void *host_ptr;
+/* private: internal use only */
+       struct page *raw_page;
+       struct pvr_device *pvr_dev;
+};
+
+/**
+ * pvr_mmu_backing_page_init() - Initialize a MMU backing page.
+ * @page: Target backing page.
+ * @pvr_dev: Target PowerVR device.
+ *
+ * This function performs three distinct operations:
+ *
+ * 1. Allocate a single page,
+ * 2. Map the page to the CPU, and
+ * 3. Map the page to DMA-space.
+ *
+ * It is expected that @page be zeroed (e.g. from kzalloc()) before calling
+ * this function.
+ *
+ * Return:
+ *  * 0 on success, or
+ *  * -%ENOMEM if allocation of the backing page or mapping of the backing
+ *    page to DMA fails.
+ */
+static int
+pvr_mmu_backing_page_init(struct pvr_mmu_backing_page *page,
+                         struct pvr_device *pvr_dev)
+{
+       struct device *dev = from_pvr_device(pvr_dev)->dev;
+
+       struct page *raw_page;
+       int err;
+
+       dma_addr_t dma_addr;
+       void *host_ptr;
+
+       raw_page = alloc_page(__GFP_ZERO | GFP_KERNEL);
+       if (!raw_page)
+               return -ENOMEM;
+
+       host_ptr = vmap(&raw_page, 1, VM_MAP, pgprot_writecombine(PAGE_KERNEL));
+       if (!host_ptr) {
+               err = -ENOMEM;
+               goto err_free_page;
+       }
+
+       dma_addr = dma_map_page(dev, raw_page, 0, PVR_MMU_BACKING_PAGE_SIZE,
+                               DMA_TO_DEVICE);
+       if (dma_mapping_error(dev, dma_addr)) {
+               err = -ENOMEM;
+               goto err_unmap_page;
+       }
+
+       page->dma_addr = dma_addr;
+       page->host_ptr = host_ptr;
+       page->pvr_dev = pvr_dev;
+       page->raw_page = raw_page;
+       kmemleak_alloc(page->host_ptr, PAGE_SIZE, 1, GFP_KERNEL);
+
+       return 0;
+
+err_unmap_page:
+       vunmap(host_ptr);
+
+err_free_page:
+       __free_page(raw_page);
+
+       return err;
+}
+
+/**
+ * pvr_mmu_backing_page_fini() - Teardown a MMU backing page.
+ * @page: Target backing page.
+ *
+ * This function performs the mirror operations to pvr_mmu_backing_page_init(),
+ * in reverse order:
+ *
+ * 1. Unmap the page from DMA-space,
+ * 2. Unmap the page from the CPU, and
+ * 3. Free the page.
+ *
+ * It also zeros @page.
+ *
+ * It is a no-op to call this function a second (or further) time on any @page.
+ */
+static void
+pvr_mmu_backing_page_fini(struct pvr_mmu_backing_page *page)
+{
+       struct device *dev = from_pvr_device(page->pvr_dev)->dev;
+
+       /* Do nothing if no allocation is present. */
+       if (!page->pvr_dev)
+               return;
+
+       dma_unmap_page(dev, page->dma_addr, PVR_MMU_BACKING_PAGE_SIZE,
+                      DMA_TO_DEVICE);
+
+       kmemleak_free(page->host_ptr);
+       vunmap(page->host_ptr);
+
+       __free_page(page->raw_page);
+
+       memset(page, 0, sizeof(*page));
+}
+
+/**
+ * pvr_mmu_backing_page_sync() - Flush a MMU backing page from the CPU to the
+ *                              device.
+ * @page: Target backing page.
+ *
+ * .. caution::
+ *
+ *    **This is potentially an expensive function call.** Only call
+ *    pvr_mmu_backing_page_sync() once you're sure you have no more changes to
+ *    make to the backing page in the immediate future.
+ */
+static void
+pvr_mmu_backing_page_sync(struct pvr_mmu_backing_page *page, u32 flags)
+{
+       struct pvr_device *pvr_dev = page->pvr_dev;
+       struct device *dev;
+
+       /*
+        * Do nothing if no allocation is present. This may be the case if
+        * we are unmapping pages.
+        */
+       if (!pvr_dev)
+               return;
+
+       dev = from_pvr_device(pvr_dev)->dev;
+
+       dma_sync_single_for_device(dev, page->dma_addr,
+                                  PVR_MMU_BACKING_PAGE_SIZE, DMA_TO_DEVICE);
+
+       pvr_mmu_set_flush_flags(pvr_dev, flags);
+}
+
+/**
+ * DOC: Raw page tables
+ */
+
+#define PVR_PAGE_TABLE_TYPEOF_ENTRY(level_) \
+       typeof_member(struct pvr_page_table_l##level_##_entry_raw, val)
+
+#define PVR_PAGE_TABLE_FIELD_GET(level_, name_, field_, entry_)           \
+       (((entry_).val &                                           \
+         ~ROGUE_MMUCTRL_##name_##_DATA_##field_##_CLRMSK) >> \
+        ROGUE_MMUCTRL_##name_##_DATA_##field_##_SHIFT)
+
+#define PVR_PAGE_TABLE_FIELD_PREP(level_, name_, field_, val_)            \
+       ((((PVR_PAGE_TABLE_TYPEOF_ENTRY(level_))(val_))            \
+         << ROGUE_MMUCTRL_##name_##_DATA_##field_##_SHIFT) & \
+        ~ROGUE_MMUCTRL_##name_##_DATA_##field_##_CLRMSK)
+
+/**
+ * struct pvr_page_table_l2_entry_raw - A single entry in a level 2 page table.
+ * @val: The raw value of this entry.
+ *
+ * This type is a structure for type-checking purposes. At compile-time, its
+ * size is checked against %ROGUE_MMUCTRL_ENTRY_SIZE_PC_VALUE.
+ *
+ * The value stored in this structure can be decoded using the following bitmap:
+ *
+ * .. flat-table::
+ *    :widths: 1 5
+ *    :stub-columns: 1
+ *
+ *    * - 31..4
+ *      - **Level 1 Page Table Base Address:** Bits 39..12 of the L1
+ *        page table base address, which is 4KiB aligned.
+ *
+ *    * - 3..2
+ *      - *(reserved)*
+ *
+ *    * - 1
+ *      - **Pending:** When valid bit is not set, indicates that a valid
+ *        entry is pending and the MMU should wait for the driver to map
+ *        the entry. This is used to support page demand mapping of
+ *        memory.
+ *
+ *    * - 0
+ *      - **Valid:** Indicates that the entry contains a valid L1 page
+ *        table. If the valid bit is not set, then an attempted use of
+ *        the page would result in a page fault.
+ */
+struct pvr_page_table_l2_entry_raw {
+       u32 val;
+} __packed;
+static_assert(sizeof(struct pvr_page_table_l2_entry_raw) * 8 ==
+             ROGUE_MMUCTRL_ENTRY_SIZE_PC_VALUE);
+
+static bool
+pvr_page_table_l2_entry_raw_is_valid(struct pvr_page_table_l2_entry_raw entry)
+{
+       return PVR_PAGE_TABLE_FIELD_GET(2, PC, VALID, entry);
+}
+
+/**
+ * pvr_page_table_l2_entry_raw_set() - Write a valid entry into a raw level 2
+ *                                     page table.
+ * @entry: Target raw level 2 page table entry.
+ * @child_table_dma_addr: DMA address of the level 1 page table to be
+ *                        associated with @entry.
+ *
+ * When calling this function, @child_table_dma_addr must be a valid DMA
+ * address and a multiple of %ROGUE_MMUCTRL_PC_DATA_PD_BASE_ALIGNSIZE.
+ */
+static void
+pvr_page_table_l2_entry_raw_set(struct pvr_page_table_l2_entry_raw *entry,
+                               dma_addr_t child_table_dma_addr)
+{
+       child_table_dma_addr >>= ROGUE_MMUCTRL_PC_DATA_PD_BASE_ALIGNSHIFT;
+
+       WRITE_ONCE(entry->val,
+                  PVR_PAGE_TABLE_FIELD_PREP(2, PC, VALID, true) |
+                  PVR_PAGE_TABLE_FIELD_PREP(2, PC, ENTRY_PENDING, false) |
+                  PVR_PAGE_TABLE_FIELD_PREP(2, PC, PD_BASE, child_table_dma_addr));
+}
+
+static void
+pvr_page_table_l2_entry_raw_clear(struct pvr_page_table_l2_entry_raw *entry)
+{
+       WRITE_ONCE(entry->val, 0);
+}
+
+/**
+ * struct pvr_page_table_l1_entry_raw - A single entry in a level 1 page table.
+ * @val: The raw value of this entry.
+ *
+ * This type is a structure for type-checking purposes. At compile-time, its
+ * size is checked against %ROGUE_MMUCTRL_ENTRY_SIZE_PD_VALUE.
+ *
+ * The value stored in this structure can be decoded using the following bitmap:
+ *
+ * .. flat-table::
+ *    :widths: 1 5
+ *    :stub-columns: 1
+ *
+ *    * - 63..41
+ *      - *(reserved)*
+ *
+ *    * - 40
+ *      - **Pending:** When valid bit is not set, indicates that a valid entry
+ *        is pending and the MMU should wait for the driver to map the entry.
+ *        This is used to support page demand mapping of memory.
+ *
+ *    * - 39..5
+ *      - **Level 0 Page Table Base Address:** The way this value is
+ *        interpreted depends on the page size. Bits not specified in the
+ *        table below (e.g. bits 11..5 for page size 4KiB) should be
+ *        considered reserved.
+ *
+ *        This table shows the bits used in an L1 page table entry to
+ *        represent the Physical Table Base Address for a given Page Size.
+ *        Since each L1 page table entry covers 2MiB of address space, the
+ *        maximum page size is 2MiB.
+ *
+ *        .. flat-table::
+ *           :widths: 1 1 1 1
+ *           :header-rows: 1
+ *           :stub-columns: 1
+ *
+ *           * - Page size
+ *             - L0 page table base address bits
+ *             - Number of L0 page table entries
+ *             - Size of L0 page table
+ *
+ *           * - 4KiB
+ *             - 39..12
+ *             - 512
+ *             - 4KiB
+ *
+ *           * - 16KiB
+ *             - 39..10
+ *             - 128
+ *             - 1KiB
+ *
+ *           * - 64KiB
+ *             - 39..8
+ *             - 32
+ *             - 256B
+ *
+ *           * - 256KiB
+ *             - 39..6
+ *             - 8
+ *             - 64B
+ *
+ *           * - 1MiB
+ *             - 39..5 (4 = '0')
+ *             - 2
+ *             - 16B
+ *
+ *           * - 2MiB
+ *             - 39..5 (4..3 = '00')
+ *             - 1
+ *             - 8B
+ *
+ *    * - 4
+ *      - *(reserved)*
+ *
+ *    * - 3..1
+ *      - **Page Size:** Sets the page size, from 4KiB to 2MiB.
+ *
+ *    * - 0
+ *      - **Valid:** Indicates that the entry contains a valid L0 page table.
+ *        If the valid bit is not set, then an attempted use of the page would
+ *        result in a page fault.
+ */
+struct pvr_page_table_l1_entry_raw {
+       u64 val;
+} __packed;
+static_assert(sizeof(struct pvr_page_table_l1_entry_raw) * 8 ==
+             ROGUE_MMUCTRL_ENTRY_SIZE_PD_VALUE);
+
+static bool
+pvr_page_table_l1_entry_raw_is_valid(struct pvr_page_table_l1_entry_raw entry)
+{
+       return PVR_PAGE_TABLE_FIELD_GET(1, PD, VALID, entry);
+}
+
+/**
+ * pvr_page_table_l1_entry_raw_set() - Write a valid entry into a raw level 1
+ *                                     page table.
+ * @entry: Target raw level 1 page table entry.
+ * @child_table_dma_addr: DMA address of the level 0 page table to be
+ *                        associated with @entry.
+ *
+ * When calling this function, @child_table_dma_addr must be a valid DMA
+ * address and a multiple of 4 KiB.
+ */
+static void
+pvr_page_table_l1_entry_raw_set(struct pvr_page_table_l1_entry_raw *entry,
+                               dma_addr_t child_table_dma_addr)
+{
+       WRITE_ONCE(entry->val,
+                  PVR_PAGE_TABLE_FIELD_PREP(1, PD, VALID, true) |
+                  PVR_PAGE_TABLE_FIELD_PREP(1, PD, ENTRY_PENDING, false) |
+                  PVR_PAGE_TABLE_FIELD_PREP(1, PD, PAGE_SIZE, ROGUE_MMUCTRL_PAGE_SIZE_X) |
+                  /*
+                   * The use of a 4K-specific macro here is correct. It is
+                   * a future optimization to allocate sub-host-page-sized
+                   * blocks for individual tables, so the condition that any
+                   * page table address is aligned to the size of the
+                   * largest (a 4KB) table currently holds.
+                   */
+                  (child_table_dma_addr & ~ROGUE_MMUCTRL_PT_BASE_4KB_RANGE_CLRMSK));
+}
+
+static void
+pvr_page_table_l1_entry_raw_clear(struct pvr_page_table_l1_entry_raw *entry)
+{
+       WRITE_ONCE(entry->val, 0);
+}
+
+/**
+ * struct pvr_page_table_l0_entry_raw - A single entry in a level 0 page table.
+ * @val: The raw value of this entry.
+ *
+ * This type is a structure for type-checking purposes. At compile-time, its
+ * size is checked against %ROGUE_MMUCTRL_ENTRY_SIZE_PT_VALUE.
+ *
+ * The value stored in this structure can be decoded using the following bitmap:
+ *
+ * .. flat-table::
+ *    :widths: 1 5
+ *    :stub-columns: 1
+ *
+ *    * - 63
+ *      - *(reserved)*
+ *
+ *    * - 62
+ *      - **PM/FW Protect:** Indicates a protected region which only the
+ *        Parameter Manager (PM) or firmware processor can write to.
+ *
+ *    * - 61..40
+ *      - **VP Page (High):** Virtual-physical page used for Parameter Manager
+ *        (PM) memory. This field is only used if the additional level of PB
+ *        virtualization is enabled. The VP Page field is needed by the PM in
+ *        order to correctly reconstitute the free lists after render
+ *        completion. This (High) field holds bits 39..18 of the value; the
+ *        Low field holds bits 17..12. Bits 11..0 are always zero because the
+ *        value is always aligned to the 4KiB page size.
+ *
+ *    * - 39..12
+ *      - **Physical Page Address:** The way this value is interpreted depends
+ *        on the page size. Bits not specified in the table below (e.g. bits
+ *        20..12 for page size 2MiB) should be considered reserved.
+ *
+ *        This table shows the bits used in an L0 page table entry to represent
+ *        the Physical Page Address for a given page size (as defined in the
+ *        associated L1 page table entry).
+ *
+ *        .. flat-table::
+ *           :widths: 1 1
+ *           :header-rows: 1
+ *           :stub-columns: 1
+ *
+ *           * - Page size
+ *             - Physical address bits
+ *
+ *           * - 4KiB
+ *             - 39..12
+ *
+ *           * - 16KiB
+ *             - 39..14
+ *
+ *           * - 64KiB
+ *             - 39..16
+ *
+ *           * - 256KiB
+ *             - 39..18
+ *
+ *           * - 1MiB
+ *             - 39..20
+ *
+ *           * - 2MiB
+ *             - 39..21
+ *
+ *    * - 11..6
+ *      - **VP Page (Low):** Continuation of VP Page (High).
+ *
+ *    * - 5
+ *      - **Pending:** When valid bit is not set, indicates that a valid entry
+ *        is pending and the MMU should wait for the driver to map the entry.
+ *        This is used to support page demand mapping of memory.
+ *
+ *    * - 4
+ *      - **PM Src:** Set on Parameter Manager (PM) allocated page table
+ *        entries when indicated by the PM. Note that this bit will only be set
+ *        by the PM, not by the device driver.
+ *
+ *    * - 3
+ *      - **SLC Bypass Control:** Specifies requests to this page should bypass
+ *        the System Level Cache (SLC), if enabled in SLC configuration.
+ *
+ *    * - 2
+ *      - **Cache Coherency:** Indicates that the page is coherent (i.e. it
+ *        does not require a cache flush between operations on the CPU and the
+ *        device).
+ *
+ *    * - 1
+ *      - **Read Only:** If set, this bit indicates that the page is read only.
+ *        An attempted write to this page would result in a write-protection
+ *        fault.
+ *
+ *    * - 0
+ *      - **Valid:** Indicates that the entry contains a valid page. If the
+ *        valid bit is not set, then an attempted use of the page would result
+ *        in a page fault.
+ */
+struct pvr_page_table_l0_entry_raw {
+       u64 val;
+} __packed;
+static_assert(sizeof(struct pvr_page_table_l0_entry_raw) * 8 ==
+             ROGUE_MMUCTRL_ENTRY_SIZE_PT_VALUE);
+
+/**
+ * struct pvr_page_flags_raw - The configurable flags from a single entry in a
+ *                             level 0 page table.
+ * @val: The raw value of these flags. Since these are a strict subset of
+ *       &struct pvr_page_table_l0_entry_raw; use that type for our member here.
+ *
+ * The flags stored in this type are: PM/FW Protect; SLC Bypass Control; Cache
+ * Coherency, and Read Only (bits 62, 3, 2 and 1 respectively).
+ *
+ * This type should never be instantiated directly; instead use
+ * pvr_page_flags_raw_create() to ensure only valid bits of @val are set.
+ */
+struct pvr_page_flags_raw {
+       struct pvr_page_table_l0_entry_raw val;
+} __packed;
+static_assert(sizeof(struct pvr_page_flags_raw) ==
+             sizeof(struct pvr_page_table_l0_entry_raw));
+
+static bool
+pvr_page_table_l0_entry_raw_is_valid(struct pvr_page_table_l0_entry_raw entry)
+{
+       return PVR_PAGE_TABLE_FIELD_GET(0, PT, VALID, entry);
+}
+
+/**
+ * pvr_page_table_l0_entry_raw_set() - Write a valid entry into a raw level 0
+ *                                     page table.
+ * @entry: Target raw level 0 page table entry.
+ * @dma_addr: DMA address of the physical page to be associated with @entry.
+ * @flags: Options to be set on @entry.
+ *
+ * When calling this function, @child_table_dma_addr must be a valid DMA
+ * address and a multiple of %PVR_DEVICE_PAGE_SIZE.
+ *
+ * The @flags parameter is directly assigned into @entry. It is the callers
+ * responsibility to ensure that only bits specified in
+ * &struct pvr_page_flags_raw are set in @flags.
+ */
+static void
+pvr_page_table_l0_entry_raw_set(struct pvr_page_table_l0_entry_raw *entry,
+                               dma_addr_t dma_addr,
+                               struct pvr_page_flags_raw flags)
+{
+       WRITE_ONCE(entry->val, PVR_PAGE_TABLE_FIELD_PREP(0, PT, VALID, true) |
+                              PVR_PAGE_TABLE_FIELD_PREP(0, PT, ENTRY_PENDING, false) |
+                              (dma_addr & ~ROGUE_MMUCTRL_PAGE_X_RANGE_CLRMSK) |
+                              flags.val.val);
+}
+
+static void
+pvr_page_table_l0_entry_raw_clear(struct pvr_page_table_l0_entry_raw *entry)
+{
+       WRITE_ONCE(entry->val, 0);
+}
+
+/**
+ * pvr_page_flags_raw_create() - Initialize the flag bits of a raw level 0 page
+ *                               table entry.
+ * @read_only: This page is read-only (see: Read Only).
+ * @cache_coherent: This page does not require cache flushes (see: Cache
+ *                  Coherency).
+ * @slc_bypass: This page bypasses the device cache (see: SLC Bypass Control).
+ * @pm_fw_protect: This page is only for use by the firmware or Parameter
+ *                 Manager (see PM/FW Protect).
+ *
+ * For more details on the use of these four options, see their respective
+ * entries in the table under &struct pvr_page_table_l0_entry_raw.
+ *
+ * Return:
+ * A new &struct pvr_page_flags_raw instance which can be passed directly to
+ * pvr_page_table_l0_entry_raw_set() or pvr_page_table_l0_insert().
+ */
+static struct pvr_page_flags_raw
+pvr_page_flags_raw_create(bool read_only, bool cache_coherent, bool slc_bypass,
+                         bool pm_fw_protect)
+{
+       struct pvr_page_flags_raw flags;
+
+       flags.val.val =
+               PVR_PAGE_TABLE_FIELD_PREP(0, PT, READ_ONLY, read_only) |
+               PVR_PAGE_TABLE_FIELD_PREP(0, PT, CC, cache_coherent) |
+               PVR_PAGE_TABLE_FIELD_PREP(0, PT, SLC_BYPASS_CTRL, slc_bypass) |
+               PVR_PAGE_TABLE_FIELD_PREP(0, PT, PM_META_PROTECT, pm_fw_protect);
+
+       return flags;
+}
+
+/**
+ * struct pvr_page_table_l2_raw - The raw data of a level 2 page table.
+ *
+ * This type is a structure for type-checking purposes. At compile-time, its
+ * size is checked against %PVR_MMU_BACKING_PAGE_SIZE.
+ */
+struct pvr_page_table_l2_raw {
+       /** @entries: The raw values of this table. */
+       struct pvr_page_table_l2_entry_raw
+               entries[ROGUE_MMUCTRL_ENTRIES_PC_VALUE];
+} __packed;
+static_assert(sizeof(struct pvr_page_table_l2_raw) == PVR_MMU_BACKING_PAGE_SIZE);
+
+/**
+ * struct pvr_page_table_l1_raw - The raw data of a level 1 page table.
+ *
+ * This type is a structure for type-checking purposes. At compile-time, its
+ * size is checked against %PVR_MMU_BACKING_PAGE_SIZE.
+ */
+struct pvr_page_table_l1_raw {
+       /** @entries: The raw values of this table. */
+       struct pvr_page_table_l1_entry_raw
+               entries[ROGUE_MMUCTRL_ENTRIES_PD_VALUE];
+} __packed;
+static_assert(sizeof(struct pvr_page_table_l1_raw) == PVR_MMU_BACKING_PAGE_SIZE);
+
+/**
+ * struct pvr_page_table_l0_raw - The raw data of a level 0 page table.
+ *
+ * This type is a structure for type-checking purposes. At compile-time, its
+ * size is checked against %PVR_MMU_BACKING_PAGE_SIZE.
+ *
+ * .. caution::
+ *
+ *    The size of level 0 page tables is variable depending on the page size
+ *    specified in the associated level 1 page table entry. Since the device
+ *    page size in use is pegged to the host page size, it cannot vary at
+ *    runtime. This structure is therefore only defined to contain the required
+ *    number of entries for the current device page size. **You should never
+ *    read or write beyond the last supported entry.**
+ */
+struct pvr_page_table_l0_raw {
+       /** @entries: The raw values of this table. */
+       struct pvr_page_table_l0_entry_raw
+               entries[ROGUE_MMUCTRL_ENTRIES_PT_VALUE_X];
+} __packed;
+static_assert(sizeof(struct pvr_page_table_l0_raw) <= PVR_MMU_BACKING_PAGE_SIZE);
+
+/**
+ * DOC: Mirror page tables
+ */
+
+/*
+ * We pre-declare these types because they cross-depend on pointers to each
+ * other.
+ */
+struct pvr_page_table_l1;
+struct pvr_page_table_l0;
+
+/**
+ * struct pvr_page_table_l2 - A wrapped level 2 page table.
+ *
+ * To access the raw part of this table, use pvr_page_table_l2_get_raw().
+ * Alternatively to access a raw entry directly, use
+ * pvr_page_table_l2_get_entry_raw().
+ *
+ * A level 2 page table forms the root of the page table tree structure, so
+ * this type has no &parent or &parent_idx members.
+ */
+struct pvr_page_table_l2 {
+       /**
+        * @entries: The children of this node in the page table tree
+        * structure. These are also mirror tables. The indexing of this array
+        * is identical to that of the raw equivalent
+        * (&pvr_page_table_l1_raw.entries).
+        */
+       struct pvr_page_table_l1 *entries[ROGUE_MMUCTRL_ENTRIES_PC_VALUE];
+
+       /**
+        * @backing_page: A handle to the memory which holds the raw
+        * equivalent of this table. **For internal use only.**
+        */
+       struct pvr_mmu_backing_page backing_page;
+
+       /**
+        * @entry_count: The current number of valid entries (that we know of)
+        * in this table. This value is essentially a refcount - the table is
+        * destroyed when this value is decremented to zero by
+        * pvr_page_table_l2_remove().
+        */
+       u16 entry_count;
+};
+
+/**
+ * pvr_page_table_l2_init() - Initialize a level 2 page table.
+ * @table: Target level 2 page table.
+ * @pvr_dev: Target PowerVR device
+ *
+ * It is expected that @table be zeroed (e.g. from kzalloc()) before calling
+ * this function.
+ *
+ * Return:
+ *  * 0 on success, or
+ *  * Any error encountered while intializing &table->backing_page using
+ *    pvr_mmu_backing_page_init().
+ */
+static int
+pvr_page_table_l2_init(struct pvr_page_table_l2 *table,
+                      struct pvr_device *pvr_dev)
+{
+       return pvr_mmu_backing_page_init(&table->backing_page, pvr_dev);
+}
+
+/**
+ * pvr_page_table_l2_fini() - Teardown a level 2 page table.
+ * @table: Target level 2 page table.
+ *
+ * It is an error to attempt to use @table after calling this function.
+ */
+static void
+pvr_page_table_l2_fini(struct pvr_page_table_l2 *table)
+{
+       pvr_mmu_backing_page_fini(&table->backing_page);
+}
+
+/**
+ * pvr_page_table_l2_sync() - Flush a level 2 page table from the CPU to the
+ *                            device.
+ * @table: Target level 2 page table.
+ *
+ * This is just a thin wrapper around pvr_mmu_backing_page_sync(), so the
+ * warning there applies here too: **Only call pvr_page_table_l2_sync() once
+ * you're sure you have no more changes to make to** @table **in the immediate
+ * future.**
+ *
+ * If child level 1 page tables of @table also need to be flushed, this should
+ * be done first using pvr_page_table_l1_sync() *before* calling this function.
+ */
+static void
+pvr_page_table_l2_sync(struct pvr_page_table_l2 *table)
+{
+       pvr_mmu_backing_page_sync(&table->backing_page, PVR_MMU_SYNC_LEVEL_2_FLAGS);
+}
+
+/**
+ * pvr_page_table_l2_get_raw() - Access the raw equivalent of a mirror level 2
+ *                               page table.
+ * @table: Target level 2 page table.
+ *
+ * Essentially returns the CPU address of the raw equivalent of @table, cast to
+ * a &struct pvr_page_table_l2_raw pointer.
+ *
+ * You probably want to call pvr_page_table_l2_get_entry_raw() instead.
+ *
+ * Return:
+ * The raw equivalent of @table.
+ */
+static struct pvr_page_table_l2_raw *
+pvr_page_table_l2_get_raw(struct pvr_page_table_l2 *table)
+{
+       return table->backing_page.host_ptr;
+}
+
+/**
+ * pvr_page_table_l2_get_entry_raw() - Access an entry from the raw equivalent
+ *                                     of a mirror level 2 page table.
+ * @table: Target level 2 page table.
+ * @idx: Index of the entry to access.
+ *
+ * Technically this function returns a pointer to a slot in a raw level 2 page
+ * table, since the returned "entry" is not guaranteed to be valid. The caller
+ * must verify the validity of the entry at the returned address (perhaps using
+ * pvr_page_table_l2_entry_raw_is_valid()) before reading or overwriting it.
+ *
+ * The value of @idx is not checked here; it is the callers responsibility to
+ * ensure @idx refers to a valid index within @table before dereferencing the
+ * returned pointer.
+ *
+ * Return:
+ * A pointer to the requested raw level 2 page table entry.
+ */
+static struct pvr_page_table_l2_entry_raw *
+pvr_page_table_l2_get_entry_raw(struct pvr_page_table_l2 *table, u16 idx)
+{
+       return &pvr_page_table_l2_get_raw(table)->entries[idx];
+}
+
+/**
+ * pvr_page_table_l2_entry_is_valid() - Check if a level 2 page table entry is
+ *                                      marked as valid.
+ * @table: Target level 2 page table.
+ * @idx: Index of the entry to check.
+ *
+ * The value of @idx is not checked here; it is the callers responsibility to
+ * ensure @idx refers to a valid index within @table before calling this
+ * function.
+ */
+static bool
+pvr_page_table_l2_entry_is_valid(struct pvr_page_table_l2 *table, u16 idx)
+{
+       struct pvr_page_table_l2_entry_raw entry_raw =
+               *pvr_page_table_l2_get_entry_raw(table, idx);
+
+       return pvr_page_table_l2_entry_raw_is_valid(entry_raw);
+}
+
+/**
+ * struct pvr_page_table_l1 - A wrapped level 1 page table.
+ *
+ * To access the raw part of this table, use pvr_page_table_l1_get_raw().
+ * Alternatively to access a raw entry directly, use
+ * pvr_page_table_l1_get_entry_raw().
+ */
+struct pvr_page_table_l1 {
+       /**
+        * @entries: The children of this node in the page table tree
+        * structure. These are also mirror tables. The indexing of this array
+        * is identical to that of the raw equivalent
+        * (&pvr_page_table_l0_raw.entries).
+        */
+       struct pvr_page_table_l0 *entries[ROGUE_MMUCTRL_ENTRIES_PD_VALUE];
+
+       /**
+        * @backing_page: A handle to the memory which holds the raw
+        * equivalent of this table. **For internal use only.**
+        */
+       struct pvr_mmu_backing_page backing_page;
+
+       union {
+               /**
+                * @parent: The parent of this node in the page table tree structure.
+                *
+                * This is also a mirror table.
+                *
+                * Only valid when the L1 page table is active. When the L1 page table
+                * has been removed and queued for destruction, the next_free field
+                * should be used instead.
+                */
+               struct pvr_page_table_l2 *parent;
+
+               /**
+                * @next_free: Pointer to the next L1 page table to take/free.
+                *
+                * Used to form a linked list of L1 page tables. This is used
+                * when preallocating tables and when the page table has been
+                * removed and queued for destruction.
+                */
+               struct pvr_page_table_l1 *next_free;
+       };
+
+       /**
+        * @parent_idx: The index of the entry in the parent table (see
+        * @parent) which corresponds to this table.
+        */
+       u16 parent_idx;
+
+       /**
+        * @entry_count: The current number of valid entries (that we know of)
+        * in this table. This value is essentially a refcount - the table is
+        * destroyed when this value is decremented to zero by
+        * pvr_page_table_l1_remove().
+        */
+       u16 entry_count;
+};
+
+/**
+ * pvr_page_table_l1_init() - Initialize a level 1 page table.
+ * @table: Target level 1 page table.
+ * @pvr_dev: Target PowerVR device
+ *
+ * When this function returns successfully, @table is still not considered
+ * valid. It must be inserted into the page table tree structure with
+ * pvr_page_table_l2_insert() before it is ready for use.
+ *
+ * It is expected that @table be zeroed (e.g. from kzalloc()) before calling
+ * this function.
+ *
+ * Return:
+ *  * 0 on success, or
+ *  * Any error encountered while intializing &table->backing_page using
+ *    pvr_mmu_backing_page_init().
+ */
+static int
+pvr_page_table_l1_init(struct pvr_page_table_l1 *table,
+                      struct pvr_device *pvr_dev)
+{
+       table->parent_idx = PVR_IDX_INVALID;
+
+       return pvr_mmu_backing_page_init(&table->backing_page, pvr_dev);
+}
+
+/**
+ * pvr_page_table_l1_free() - Teardown a level 1 page table.
+ * @table: Target level 1 page table.
+ *
+ * It is an error to attempt to use @table after calling this function, even
+ * indirectly. This includes calling pvr_page_table_l2_remove(), which must
+ * be called *before* pvr_page_table_l1_free().
+ */
+static void
+pvr_page_table_l1_free(struct pvr_page_table_l1 *table)
+{
+       pvr_mmu_backing_page_fini(&table->backing_page);
+       kfree(table);
+}
+
+/**
+ * pvr_page_table_l1_sync() - Flush a level 1 page table from the CPU to the
+ *                            device.
+ * @table: Target level 1 page table.
+ *
+ * This is just a thin wrapper around pvr_mmu_backing_page_sync(), so the
+ * warning there applies here too: **Only call pvr_page_table_l1_sync() once
+ * you're sure you have no more changes to make to** @table **in the immediate
+ * future.**
+ *
+ * If child level 0 page tables of @table also need to be flushed, this should
+ * be done first using pvr_page_table_l0_sync() *before* calling this function.
+ */
+static void
+pvr_page_table_l1_sync(struct pvr_page_table_l1 *table)
+{
+       pvr_mmu_backing_page_sync(&table->backing_page, PVR_MMU_SYNC_LEVEL_1_FLAGS);
+}
+
+/**
+ * pvr_page_table_l1_get_raw() - Access the raw equivalent of a mirror level 1
+ *                               page table.
+ * @table: Target level 1 page table.
+ *
+ * Essentially returns the CPU address of the raw equivalent of @table, cast to
+ * a &struct pvr_page_table_l1_raw pointer.
+ *
+ * You probably want to call pvr_page_table_l1_get_entry_raw() instead.
+ *
+ * Return:
+ * The raw equivalent of @table.
+ */
+static struct pvr_page_table_l1_raw *
+pvr_page_table_l1_get_raw(struct pvr_page_table_l1 *table)
+{
+       return table->backing_page.host_ptr;
+}
+
+/**
+ * pvr_page_table_l1_get_entry_raw() - Access an entry from the raw equivalent
+ *                                     of a mirror level 1 page table.
+ * @table: Target level 1 page table.
+ * @idx: Index of the entry to access.
+ *
+ * Technically this function returns a pointer to a slot in a raw level 1 page
+ * table, since the returned "entry" is not guaranteed to be valid. The caller
+ * must verify the validity of the entry at the returned address (perhaps using
+ * pvr_page_table_l1_entry_raw_is_valid()) before reading or overwriting it.
+ *
+ * The value of @idx is not checked here; it is the callers responsibility to
+ * ensure @idx refers to a valid index within @table before dereferencing the
+ * returned pointer.
+ *
+ * Return:
+ * A pointer to the requested raw level 1 page table entry.
+ */
+static struct pvr_page_table_l1_entry_raw *
+pvr_page_table_l1_get_entry_raw(struct pvr_page_table_l1 *table, u16 idx)
+{
+       return &pvr_page_table_l1_get_raw(table)->entries[idx];
+}
+
+/**
+ * pvr_page_table_l1_entry_is_valid() - Check if a level 1 page table entry is
+ *                                      marked as valid.
+ * @table: Target level 1 page table.
+ * @idx: Index of the entry to check.
+ *
+ * The value of @idx is not checked here; it is the callers responsibility to
+ * ensure @idx refers to a valid index within @table before calling this
+ * function.
+ */
+static bool
+pvr_page_table_l1_entry_is_valid(struct pvr_page_table_l1 *table, u16 idx)
+{
+       struct pvr_page_table_l1_entry_raw entry_raw =
+               *pvr_page_table_l1_get_entry_raw(table, idx);
+
+       return pvr_page_table_l1_entry_raw_is_valid(entry_raw);
+}
+
+/**
+ * struct pvr_page_table_l0 - A wrapped level 0 page table.
+ *
+ * To access the raw part of this table, use pvr_page_table_l0_get_raw().
+ * Alternatively to access a raw entry directly, use
+ * pvr_page_table_l0_get_entry_raw().
+ *
+ * There is no mirror representation of an individual page, so this type has no
+ * &entries member.
+ */
+struct pvr_page_table_l0 {
+       /**
+        * @backing_page: A handle to the memory which holds the raw
+        * equivalent of this table. **For internal use only.**
+        */
+       struct pvr_mmu_backing_page backing_page;
+
+       union {
+               /**
+                * @parent: The parent of this node in the page table tree structure.
+                *
+                * This is also a mirror table.
+                *
+                * Only valid when the L0 page table is active. When the L0 page table
+                * has been removed and queued for destruction, the next_free field
+                * should be used instead.
+                */
+               struct pvr_page_table_l1 *parent;
+
+               /**
+                * @next_free: Pointer to the next L0 page table to take/free.
+                *
+                * Used to form a linked list of L0 page tables. This is used
+                * when preallocating tables and when the page table has been
+                * removed and queued for destruction.
+                */
+               struct pvr_page_table_l0 *next_free;
+       };
+
+       /**
+        * @parent_idx: The index of the entry in the parent table (see
+        * @parent) which corresponds to this table.
+        */
+       u16 parent_idx;
+
+       /**
+        * @entry_count: The current number of valid entries (that we know of)
+        * in this table. This value is essentially a refcount - the table is
+        * destroyed when this value is decremented to zero by
+        * pvr_page_table_l0_remove().
+        */
+       u16 entry_count;
+};
+
+/**
+ * pvr_page_table_l0_init() - Initialize a level 0 page table.
+ * @table: Target level 0 page table.
+ * @pvr_dev: Target PowerVR device
+ *
+ * When this function returns successfully, @table is still not considered
+ * valid. It must be inserted into the page table tree structure with
+ * pvr_page_table_l1_insert() before it is ready for use.
+ *
+ * It is expected that @table be zeroed (e.g. from kzalloc()) before calling
+ * this function.
+ *
+ * Return:
+ *  * 0 on success, or
+ *  * Any error encountered while intializing &table->backing_page using
+ *    pvr_mmu_backing_page_init().
+ */
+static int
+pvr_page_table_l0_init(struct pvr_page_table_l0 *table,
+                      struct pvr_device *pvr_dev)
+{
+       table->parent_idx = PVR_IDX_INVALID;
+
+       return pvr_mmu_backing_page_init(&table->backing_page, pvr_dev);
+}
+
+/**
+ * pvr_page_table_l0_free() - Teardown a level 0 page table.
+ * @table: Target level 0 page table.
+ *
+ * It is an error to attempt to use @table after calling this function, even
+ * indirectly. This includes calling pvr_page_table_l1_remove(), which must
+ * be called *before* pvr_page_table_l0_free().
+ */
+static void
+pvr_page_table_l0_free(struct pvr_page_table_l0 *table)
+{
+       pvr_mmu_backing_page_fini(&table->backing_page);
+       kfree(table);
+}
+
+/**
+ * pvr_page_table_l0_sync() - Flush a level 0 page table from the CPU to the
+ *                            device.
+ * @table: Target level 0 page table.
+ *
+ * This is just a thin wrapper around pvr_mmu_backing_page_sync(), so the
+ * warning there applies here too: **Only call pvr_page_table_l0_sync() once
+ * you're sure you have no more changes to make to** @table **in the immediate
+ * future.**
+ *
+ * If child pages of @table also need to be flushed, this should be done first
+ * using a DMA sync function (e.g. dma_sync_sg_for_device()) *before* calling
+ * this function.
+ */
+static void
+pvr_page_table_l0_sync(struct pvr_page_table_l0 *table)
+{
+       pvr_mmu_backing_page_sync(&table->backing_page, PVR_MMU_SYNC_LEVEL_0_FLAGS);
+}
+
+/**
+ * pvr_page_table_l0_get_raw() - Access the raw equivalent of a mirror level 0
+ *                               page table.
+ * @table: Target level 0 page table.
+ *
+ * Essentially returns the CPU address of the raw equivalent of @table, cast to
+ * a &struct pvr_page_table_l0_raw pointer.
+ *
+ * You probably want to call pvr_page_table_l0_get_entry_raw() instead.
+ *
+ * Return:
+ * The raw equivalent of @table.
+ */
+static struct pvr_page_table_l0_raw *
+pvr_page_table_l0_get_raw(struct pvr_page_table_l0 *table)
+{
+       return table->backing_page.host_ptr;
+}
+
+/**
+ * pvr_page_table_l0_get_entry_raw() - Access an entry from the raw equivalent
+ *                                     of a mirror level 0 page table.
+ * @table: Target level 0 page table.
+ * @idx: Index of the entry to access.
+ *
+ * Technically this function returns a pointer to a slot in a raw level 0 page
+ * table, since the returned "entry" is not guaranteed to be valid. The caller
+ * must verify the validity of the entry at the returned address (perhaps using
+ * pvr_page_table_l0_entry_raw_is_valid()) before reading or overwriting it.
+ *
+ * The value of @idx is not checked here; it is the callers responsibility to
+ * ensure @idx refers to a valid index within @table before dereferencing the
+ * returned pointer. This is espcially important for level 0 page tables, which
+ * can have a variable number of entries.
+ *
+ * Return:
+ * A pointer to the requested raw level 0 page table entry.
+ */
+static struct pvr_page_table_l0_entry_raw *
+pvr_page_table_l0_get_entry_raw(struct pvr_page_table_l0 *table, u16 idx)
+{
+       return &pvr_page_table_l0_get_raw(table)->entries[idx];
+}
+
+/**
+ * pvr_page_table_l0_entry_is_valid() - Check if a level 0 page table entry is
+ *                                      marked as valid.
+ * @table: Target level 0 page table.
+ * @idx: Index of the entry to check.
+ *
+ * The value of @idx is not checked here; it is the callers responsibility to
+ * ensure @idx refers to a valid index within @table before calling this
+ * function.
+ */
+static bool
+pvr_page_table_l0_entry_is_valid(struct pvr_page_table_l0 *table, u16 idx)
+{
+       struct pvr_page_table_l0_entry_raw entry_raw =
+               *pvr_page_table_l0_get_entry_raw(table, idx);
+
+       return pvr_page_table_l0_entry_raw_is_valid(entry_raw);
+}
+
+/**
+ * struct pvr_mmu_context - context holding data for operations at page
+ * catalogue level, intended for use with a VM context.
+ */
+struct pvr_mmu_context {
+       /** @pvr_dev: The PVR device associated with the owning VM context. */
+       struct pvr_device *pvr_dev;
+
+       /** @page_table_l2: The MMU table root. */
+       struct pvr_page_table_l2 page_table_l2;
+};
+
+/**
+ * struct pvr_page_table_ptr - A reference to a single physical page as indexed
+ * by the page table structure.
+ *
+ * Intended for embedding in a &struct pvr_mmu_op_context.
+ */
+struct pvr_page_table_ptr {
+       /**
+        * @l1_table: A cached handle to the level 1 page table the
+        * context is currently traversing.
+        */
+       struct pvr_page_table_l1 *l1_table;
+
+       /**
+        * @l0_table: A cached handle to the level 0 page table the
+        * context is currently traversing.
+        */
+       struct pvr_page_table_l0 *l0_table;
+
+       /**
+        * @l2_idx: Index into the level 2 page table the context is
+        * currently referencing.
+        */
+       u16 l2_idx;
+
+       /**
+        * @l1_idx: Index into the level 1 page table the context is
+        * currently referencing.
+        */
+       u16 l1_idx;
+
+       /**
+        * @l0_idx: Index into the level 0 page table the context is
+        * currently referencing.
+        */
+       u16 l0_idx;
+};
+
+/**
+ * struct pvr_mmu_op_context - context holding data for individual
+ * device-virtual mapping operations. Intended for use with a VM bind operation.
+ */
+struct pvr_mmu_op_context {
+       /** @mmu_ctx: The MMU context associated with the owning VM context. */
+       struct pvr_mmu_context *mmu_ctx;
+
+       /** @map: Data specifically for map operations. */
+       struct {
+               /**
+                * @sgt: Scatter gather table containing pages pinned for use by
+                * this context - these are currently pinned when initialising
+                * the VM bind operation.
+                */
+               struct sg_table *sgt;
+
+               /** @sgt_offset: Start address of the device-virtual mapping. */
+               u64 sgt_offset;
+
+               /**
+                * @l1_prealloc_tables: Preallocated l1 page table objects
+                * use by this context when creating a page mapping. Linked list
+                * fully created during initialisation.
+                */
+               struct pvr_page_table_l1 *l1_prealloc_tables;
+
+               /**
+                * @l0_prealloc_tables: Preallocated l0 page table objects
+                * use by this context when creating a page mapping. Linked list
+                * fully created during initialisation.
+                */
+               struct pvr_page_table_l0 *l0_prealloc_tables;
+       } map;
+
+       /** @unmap: Data specifically for unmap operations. */
+       struct {
+               /**
+                * @l1_free_tables: Collects page table objects freed by unmap
+                * ops. Linked list empty at creation.
+                */
+               struct pvr_page_table_l1 *l1_free_tables;
+
+               /**
+                * @l0_free_tables: Collects page table objects freed by unmap
+                * ops. Linked list empty at creation.
+                */
+               struct pvr_page_table_l0 *l0_free_tables;
+       } unmap;
+
+       /**
+        * @curr_page: A reference to a single physical page as indexed by the
+        * page table structure.
+        */
+       struct pvr_page_table_ptr curr_page;
+
+       /**
+        * @sync_level_required: The maximum level of the page table tree
+        * structure which has (possibly) been modified since it was last
+        * flushed to the device.
+        *
+        * This field should only be set with pvr_mmu_op_context_require_sync()
+        * or indirectly by pvr_mmu_op_context_sync_partial().
+        */
+       enum pvr_mmu_sync_level sync_level_required;
+};
+
+/**
+ * pvr_page_table_l2_insert() - Insert an entry referring to a level 1 page
+ * table into a level 2 page table.
+ * @op_ctx: Target MMU op context pointing at the entry to insert the L1 page
+ * table into.
+ * @child_table: Target level 1 page table to be referenced by the new entry.
+ *
+ * It is the caller's responsibility to ensure @op_ctx.curr_page points to a
+ * valid L2 entry.
+ *
+ * It is the caller's responsibility to execute any memory barries to ensure
+ * that the creation of @child_table is ordered before the L2 entry is inserted.
+ */
+static void
+pvr_page_table_l2_insert(struct pvr_mmu_op_context *op_ctx,
+                        struct pvr_page_table_l1 *child_table)
+{
+       struct pvr_page_table_l2 *l2_table =
+               &op_ctx->mmu_ctx->page_table_l2;
+       struct pvr_page_table_l2_entry_raw *entry_raw =
+               pvr_page_table_l2_get_entry_raw(l2_table,
+                                               op_ctx->curr_page.l2_idx);
+
+       pvr_page_table_l2_entry_raw_set(entry_raw,
+                                       child_table->backing_page.dma_addr);
+
+       child_table->parent = l2_table;
+       child_table->parent_idx = op_ctx->curr_page.l2_idx;
+       l2_table->entries[op_ctx->curr_page.l2_idx] = child_table;
+       ++l2_table->entry_count;
+       op_ctx->curr_page.l1_table = child_table;
+}
+
+/**
+ * pvr_page_table_l2_remove() - Remove a level 1 page table from a level 2 page
+ * table.
+ * @op_ctx: Target MMU op context pointing at the L2 entry to remove.
+ *
+ * It is the caller's responsibility to ensure @op_ctx.curr_page points to a
+ * valid L2 entry.
+ */
+static void
+pvr_page_table_l2_remove(struct pvr_mmu_op_context *op_ctx)
+{
+       struct pvr_page_table_l2 *l2_table =
+               &op_ctx->mmu_ctx->page_table_l2;
+       struct pvr_page_table_l2_entry_raw *entry_raw =
+               pvr_page_table_l2_get_entry_raw(l2_table,
+                                               op_ctx->curr_page.l1_table->parent_idx);
+
+       WARN_ON(op_ctx->curr_page.l1_table->parent != l2_table);
+
+       pvr_page_table_l2_entry_raw_clear(entry_raw);
+
+       l2_table->entries[op_ctx->curr_page.l1_table->parent_idx] = NULL;
+       op_ctx->curr_page.l1_table->parent_idx = PVR_IDX_INVALID;
+       op_ctx->curr_page.l1_table->next_free = op_ctx->unmap.l1_free_tables;
+       op_ctx->unmap.l1_free_tables = op_ctx->curr_page.l1_table;
+       op_ctx->curr_page.l1_table = NULL;
+
+       --l2_table->entry_count;
+}
+
+/**
+ * pvr_page_table_l1_insert() - Insert an entry referring to a level 0 page
+ * table into a level 1 page table.
+ * @op_ctx: Target MMU op context pointing at the entry to insert the L0 page
+ * table into.
+ * @child_table: L0 page table to insert.
+ *
+ * It is the caller's responsibility to ensure @op_ctx.curr_page points to a
+ * valid L1 entry.
+ *
+ * It is the caller's responsibility to execute any memory barries to ensure
+ * that the creation of @child_table is ordered before the L1 entry is inserted.
+ */
+static void
+pvr_page_table_l1_insert(struct pvr_mmu_op_context *op_ctx,
+                        struct pvr_page_table_l0 *child_table)
+{
+       struct pvr_page_table_l1_entry_raw *entry_raw =
+               pvr_page_table_l1_get_entry_raw(op_ctx->curr_page.l1_table,
+                                               op_ctx->curr_page.l1_idx);
+
+       pvr_page_table_l1_entry_raw_set(entry_raw,
+                                       child_table->backing_page.dma_addr);
+
+       child_table->parent = op_ctx->curr_page.l1_table;
+       child_table->parent_idx = op_ctx->curr_page.l1_idx;
+       op_ctx->curr_page.l1_table->entries[op_ctx->curr_page.l1_idx] = child_table;
+       ++op_ctx->curr_page.l1_table->entry_count;
+       op_ctx->curr_page.l0_table = child_table;
+}
+
+/**
+ * pvr_page_table_l1_remove() - Remove a level 0 page table from a level 1 page
+ *                              table.
+ * @op_ctx: Target MMU op context pointing at the L1 entry to remove.
+ *
+ * If this function results in the L1 table becoming empty, it will be removed
+ * from its parent level 2 page table and destroyed.
+ *
+ * It is the caller's responsibility to ensure @op_ctx.curr_page points to a
+ * valid L1 entry.
+ */
+static void
+pvr_page_table_l1_remove(struct pvr_mmu_op_context *op_ctx)
+{
+       struct pvr_page_table_l1_entry_raw *entry_raw =
+               pvr_page_table_l1_get_entry_raw(op_ctx->curr_page.l0_table->parent,
+                                               op_ctx->curr_page.l0_table->parent_idx);
+
+       WARN_ON(op_ctx->curr_page.l0_table->parent !=
+               op_ctx->curr_page.l1_table);
+
+       pvr_page_table_l1_entry_raw_clear(entry_raw);
+
+       op_ctx->curr_page.l1_table->entries[op_ctx->curr_page.l0_table->parent_idx] = NULL;
+       op_ctx->curr_page.l0_table->parent_idx = PVR_IDX_INVALID;
+       op_ctx->curr_page.l0_table->next_free = op_ctx->unmap.l0_free_tables;
+       op_ctx->unmap.l0_free_tables = op_ctx->curr_page.l0_table;
+       op_ctx->curr_page.l0_table = NULL;
+
+       if (--op_ctx->curr_page.l1_table->entry_count == 0) {
+               /* Clear the parent L2 page table entry. */
+               if (op_ctx->curr_page.l1_table->parent_idx != PVR_IDX_INVALID)
+                       pvr_page_table_l2_remove(op_ctx);
+       }
+}
+
+/**
+ * pvr_page_table_l0_insert() - Insert an entry referring to a physical page
+ * into a level 0 page table.
+ * @op_ctx: Target MMU op context pointing at the L0 entry to insert.
+ * @dma_addr: Target DMA address to be referenced by the new entry.
+ * @flags: Page options to be stored in the new entry.
+ *
+ * It is the caller's responsibility to ensure @op_ctx.curr_page points to a
+ * valid L0 entry.
+ */
+static void
+pvr_page_table_l0_insert(struct pvr_mmu_op_context *op_ctx,
+                        dma_addr_t dma_addr, struct pvr_page_flags_raw flags)
+{
+       struct pvr_page_table_l0_entry_raw *entry_raw =
+               pvr_page_table_l0_get_entry_raw(op_ctx->curr_page.l0_table,
+                                               op_ctx->curr_page.l0_idx);
+
+       pvr_page_table_l0_entry_raw_set(entry_raw, dma_addr, flags);
+
+       /*
+        * There is no entry to set here - we don't keep a mirror of
+        * individual pages.
+        */
+
+       ++op_ctx->curr_page.l0_table->entry_count;
+}
+
+/**
+ * pvr_page_table_l0_remove() - Remove a physical page from a level 0 page
+ * table.
+ * @op_ctx: Target MMU op context pointing at the L0 entry to remove.
+ *
+ * If this function results in the L0 table becoming empty, it will be removed
+ * from its parent L1 page table and destroyed.
+ *
+ * It is the caller's responsibility to ensure @op_ctx.curr_page points to a
+ * valid L0 entry.
+ */
+static void
+pvr_page_table_l0_remove(struct pvr_mmu_op_context *op_ctx)
+{
+       struct pvr_page_table_l0_entry_raw *entry_raw =
+               pvr_page_table_l0_get_entry_raw(op_ctx->curr_page.l0_table,
+                                               op_ctx->curr_page.l0_idx);
+
+       pvr_page_table_l0_entry_raw_clear(entry_raw);
+
+       /*
+        * There is no entry to clear here - we don't keep a mirror of
+        * individual pages.
+        */
+
+       if (--op_ctx->curr_page.l0_table->entry_count == 0) {
+               /* Clear the parent L1 page table entry. */
+               if (op_ctx->curr_page.l0_table->parent_idx != PVR_IDX_INVALID)
+                       pvr_page_table_l1_remove(op_ctx);
+       }
+}
+
+/**
+ * DOC: Page table index utilities
+ */
+
+/**
+ * pvr_page_table_l2_idx() - Calculate the level 2 page table index for a
+ *                           device-virtual address.
+ * @device_addr: Target device-virtual address.
+ *
+ * This function does not perform any bounds checking - it is the caller's
+ * responsibility to ensure that @device_addr is valid before interpreting
+ * the result.
+ *
+ * Return:
+ * The index into a level 2 page table corresponding to @device_addr.
+ */
+static u16
+pvr_page_table_l2_idx(u64 device_addr)
+{
+       return (device_addr & ~ROGUE_MMUCTRL_VADDR_PC_INDEX_CLRMSK) >>
+              ROGUE_MMUCTRL_VADDR_PC_INDEX_SHIFT;
+}
+
+/**
+ * pvr_page_table_l1_idx() - Calculate the level 1 page table index for a
+ *                           device-virtual address.
+ * @device_addr: Target device-virtual address.
+ *
+ * This function does not perform any bounds checking - it is the caller's
+ * responsibility to ensure that @device_addr is valid before interpreting
+ * the result.
+ *
+ * Return:
+ * The index into a level 1 page table corresponding to @device_addr.
+ */
+static u16
+pvr_page_table_l1_idx(u64 device_addr)
+{
+       return (device_addr & ~ROGUE_MMUCTRL_VADDR_PD_INDEX_CLRMSK) >>
+              ROGUE_MMUCTRL_VADDR_PD_INDEX_SHIFT;
+}
+
+/**
+ * pvr_page_table_l0_idx() - Calculate the level 0 page table index for a
+ *                           device-virtual address.
+ * @device_addr: Target device-virtual address.
+ *
+ * This function does not perform any bounds checking - it is the caller's
+ * responsibility to ensure that @device_addr is valid before interpreting
+ * the result.
+ *
+ * Return:
+ * The index into a level 0 page table corresponding to @device_addr.
+ */
+static u16
+pvr_page_table_l0_idx(u64 device_addr)
+{
+       return (device_addr & ~ROGUE_MMUCTRL_VADDR_PT_INDEX_CLRMSK) >>
+              ROGUE_MMUCTRL_PAGE_X_RANGE_SHIFT;
+}
+
+/**
+ * DOC: High-level page table operations
+ */
+
+/**
+ * pvr_page_table_l1_get_or_insert() - Retrieves (optionally inserting if
+ * necessary) a level 1 page table from the specified level 2 page table entry.
+ * @op_ctx: Target MMU op context.
+ * @should_insert: [IN] Specifies whether new page tables should be inserted
+ * when empty page table entries are encountered during traversal.
+ *
+ * Return:
+ *  * 0 on success, or
+ *
+ *    If @should_insert is %false:
+ *     * -%ENXIO if a level 1 page table would have been inserted.
+ *
+ *    If @should_insert is %true:
+ *     * Any error encountered while inserting the level 1 page table.
+ */
+static int
+pvr_page_table_l1_get_or_insert(struct pvr_mmu_op_context *op_ctx,
+                               bool should_insert)
+{
+       struct pvr_page_table_l2 *l2_table =
+               &op_ctx->mmu_ctx->page_table_l2;
+       struct pvr_page_table_l1 *table;
+
+       if (pvr_page_table_l2_entry_is_valid(l2_table,
+                                            op_ctx->curr_page.l2_idx)) {
+               op_ctx->curr_page.l1_table =
+                       l2_table->entries[op_ctx->curr_page.l2_idx];
+               return 0;
+       }
+
+       if (!should_insert)
+               return -ENXIO;
+
+       /* Take a prealloced table. */
+       table = op_ctx->map.l1_prealloc_tables;
+       if (!table)
+               return -ENOMEM;
+
+       /* Pop */
+       op_ctx->map.l1_prealloc_tables = table->next_free;
+       table->next_free = NULL;
+
+       /* Ensure new table is fully written out before adding to L2 page table. */
+       wmb();
+
+       pvr_page_table_l2_insert(op_ctx, table);
+
+       return 0;
+}
+
+/**
+ * pvr_page_table_l0_get_or_insert() - Retrieves (optionally inserting if
+ * necessary) a level 0 page table from the specified level 1 page table entry.
+ * @op_ctx: Target MMU op context.
+ * @should_insert: [IN] Specifies whether new page tables should be inserted
+ * when empty page table entries are encountered during traversal.
+ *
+ * Return:
+ *  * 0 on success,
+ *
+ *    If @should_insert is %false:
+ *     * -%ENXIO if a level 0 page table would have been inserted.
+ *
+ *    If @should_insert is %true:
+ *     * Any error encountered while inserting the level 0 page table.
+ */
+static int
+pvr_page_table_l0_get_or_insert(struct pvr_mmu_op_context *op_ctx,
+                               bool should_insert)
+{
+       struct pvr_page_table_l0 *table;
+
+       if (pvr_page_table_l1_entry_is_valid(op_ctx->curr_page.l1_table,
+                                            op_ctx->curr_page.l1_idx)) {
+               op_ctx->curr_page.l0_table =
+                       op_ctx->curr_page.l1_table->entries[op_ctx->curr_page.l1_idx];
+               return 0;
+       }
+
+       if (!should_insert)
+               return -ENXIO;
+
+       /* Take a prealloced table. */
+       table = op_ctx->map.l0_prealloc_tables;
+       if (!table)
+               return -ENOMEM;
+
+       /* Pop */
+       op_ctx->map.l0_prealloc_tables = table->next_free;
+       table->next_free = NULL;
+
+       /* Ensure new table is fully written out before adding to L1 page table. */
+       wmb();
+
+       pvr_page_table_l1_insert(op_ctx, table);
+
+       return 0;
+}
+
+/**
+ * pvr_mmu_context_create() - Create an MMU context.
+ * @pvr_dev: PVR device associated with owning VM context.
+ *
+ * Returns:
+ *  * Newly created MMU context object on success, or
+ *  * -%ENOMEM if no memory is available,
+ *  * Any error code returned by pvr_page_table_l2_init().
+ */
+struct pvr_mmu_context *pvr_mmu_context_create(struct pvr_device *pvr_dev)
+{
+       struct pvr_mmu_context *ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+       int err;
+
+       if (!ctx)
+               return ERR_PTR(-ENOMEM);
+
+       err = pvr_page_table_l2_init(&ctx->page_table_l2, pvr_dev);
+       if (err)
+               return ERR_PTR(err);
+
+       ctx->pvr_dev = pvr_dev;
+
+       return ctx;
+}
+
+/**
+ * pvr_mmu_context_destroy() - Destroy an MMU context.
+ * @ctx: Target MMU context.
+ */
+void pvr_mmu_context_destroy(struct pvr_mmu_context *ctx)
+{
+       pvr_page_table_l2_fini(&ctx->page_table_l2);
+       kfree(ctx);
+}
+
+/**
+ * pvr_mmu_get_root_table_dma_addr() - Get the DMA address of the root of the
+ * page table structure behind a VM context.
+ * @ctx: Target MMU context.
+ */
+dma_addr_t pvr_mmu_get_root_table_dma_addr(struct pvr_mmu_context *ctx)
+{
+       return ctx->page_table_l2.backing_page.dma_addr;
+}
+
+/**
+ * pvr_page_table_l1_alloc() - Allocate a l1 page_table object.
+ * @ctx: MMU context of owning VM context.
+ *
+ * Returns:
+ *  * Newly created page table object on success, or
+ *  * -%ENOMEM if no memory is available,
+ *  * Any error code returned by pvr_page_table_l1_init().
+ */
+static struct pvr_page_table_l1 *
+pvr_page_table_l1_alloc(struct pvr_mmu_context *ctx)
+{
+       int err;
+
+       struct pvr_page_table_l1 *table =
+               kzalloc(sizeof(*table), GFP_KERNEL);
+
+       if (!table)
+               return ERR_PTR(-ENOMEM);
+
+       err = pvr_page_table_l1_init(table, ctx->pvr_dev);
+       if (err) {
+               kfree(table);
+               return ERR_PTR(err);
+       }
+
+       return table;
+}
+
+/**
+ * pvr_page_table_l0_alloc() - Allocate a l0 page_table object.
+ * @ctx: MMU context of owning VM context.
+ *
+ * Returns:
+ *  * Newly created page table object on success, or
+ *  * -%ENOMEM if no memory is available,
+ *  * Any error code returned by pvr_page_table_l0_init().
+ */
+static struct pvr_page_table_l0 *
+pvr_page_table_l0_alloc(struct pvr_mmu_context *ctx)
+{
+       int err;
+
+       struct pvr_page_table_l0 *table =
+               kzalloc(sizeof(*table), GFP_KERNEL);
+
+       if (!table)
+               return ERR_PTR(-ENOMEM);
+
+       err = pvr_page_table_l0_init(table, ctx->pvr_dev);
+       if (err) {
+               kfree(table);
+               return ERR_PTR(err);
+       }
+
+       return table;
+}
+
+/**
+ * pvr_mmu_op_context_require_sync() - Mark an MMU op context as requiring a
+ * sync operation for the referenced page tables up to a specified level.
+ * @op_ctx: Target MMU op context.
+ * @level: Maximum page table level for which a sync is required.
+ */
+static void
+pvr_mmu_op_context_require_sync(struct pvr_mmu_op_context *op_ctx,
+                               enum pvr_mmu_sync_level level)
+{
+       if (op_ctx->sync_level_required < level)
+               op_ctx->sync_level_required = level;
+}
+
+/**
+ * pvr_mmu_op_context_sync_manual() - Trigger a sync of some or all of the
+ * page tables referenced by a MMU op context.
+ * @op_ctx: Target MMU op context.
+ * @level: Maximum page table level to sync.
+ *
+ * Do not call this function directly. Instead use
+ * pvr_mmu_op_context_sync_partial() which is checked against the current
+ * value of &op_ctx->sync_level_required as set by
+ * pvr_mmu_op_context_require_sync().
+ */
+static void
+pvr_mmu_op_context_sync_manual(struct pvr_mmu_op_context *op_ctx,
+                              enum pvr_mmu_sync_level level)
+{
+       /*
+        * We sync the page table levels in ascending order (starting from the
+        * leaf node) to ensure consistency.
+        */
+
+       WARN_ON(level < PVR_MMU_SYNC_LEVEL_NONE);
+
+       if (level <= PVR_MMU_SYNC_LEVEL_NONE)
+               return;
+
+       if (op_ctx->curr_page.l0_table)
+               pvr_page_table_l0_sync(op_ctx->curr_page.l0_table);
+
+       if (level < PVR_MMU_SYNC_LEVEL_1)
+               return;
+
+       if (op_ctx->curr_page.l1_table)
+               pvr_page_table_l1_sync(op_ctx->curr_page.l1_table);
+
+       if (level < PVR_MMU_SYNC_LEVEL_2)
+               return;
+
+       pvr_page_table_l2_sync(&op_ctx->mmu_ctx->page_table_l2);
+}
+
+/**
+ * pvr_mmu_op_context_sync_partial() - Trigger a sync of some or all of the
+ * page tables referenced by a MMU op context.
+ * @op_ctx: Target MMU op context.
+ * @level: Requested page table level to sync up to (inclusive).
+ *
+ * If @level is greater than the maximum level recorded by @op_ctx as requiring
+ * a sync operation, only the previously recorded maximum will be used.
+ *
+ * Additionally, if @level is greater than or equal to the maximum level
+ * recorded by @op_ctx as requiring a sync operation, that maximum level will be
+ * reset as a full sync will be performed. This is equivalent to calling
+ * pvr_mmu_op_context_sync().
+ */
+static void
+pvr_mmu_op_context_sync_partial(struct pvr_mmu_op_context *op_ctx,
+                               enum pvr_mmu_sync_level level)
+{
+       /*
+        * If the requested sync level is greater than or equal to the
+        * currently required sync level, we do two things:
+        *  * Don't waste time syncing levels we haven't previously marked as
+        *    requiring a sync, and
+        *  * Reset the required sync level since we are about to sync
+        *    everything that was previously marked as requiring a sync.
+        */
+       if (level >= op_ctx->sync_level_required) {
+               level = op_ctx->sync_level_required;
+               op_ctx->sync_level_required = PVR_MMU_SYNC_LEVEL_NONE;
+       }
+
+       pvr_mmu_op_context_sync_manual(op_ctx, level);
+}
+
+/**
+ * pvr_mmu_op_context_sync() - Trigger a sync of every page table referenced by
+ * a MMU op context.
+ * @op_ctx: Target MMU op context.
+ *
+ * The maximum level marked internally as requiring a sync will be reset so
+ * that subsequent calls to this function will be no-ops unless @op_ctx is
+ * otherwise updated.
+ */
+static void
+pvr_mmu_op_context_sync(struct pvr_mmu_op_context *op_ctx)
+{
+       pvr_mmu_op_context_sync_manual(op_ctx, op_ctx->sync_level_required);
+
+       op_ctx->sync_level_required = PVR_MMU_SYNC_LEVEL_NONE;
+}
+
+/**
+ * pvr_mmu_op_context_load_tables() - Load pointers to tables in each level of
+ * the page table tree structure needed to reference the physical page
+ * referenced by a MMU op context.
+ * @op_ctx: Target MMU op context.
+ * @should_create: Specifies whether new page tables should be created when
+ * empty page table entries are encountered during traversal.
+ * @load_level_required: Maximum page table level to load.
+ *
+ * If @should_create is %true, this function may modify the stored required
+ * sync level of @op_ctx as new page tables are created and inserted into their
+ * respective parents.
+ *
+ * Since there is only one root page table, it is technically incorrect to call
+ * this function with a value of @load_level_required greater than or equal to
+ * the root level number. However, this is not explicitly disallowed here.
+ *
+ * Return:
+ *  * 0 on success,
+ *  * Any error returned by pvr_page_table_l1_get_or_create() if
+ *    @load_level_required >= 1 except -%ENXIO, or
+ *  * Any error returned by pvr_page_table_l0_get_or_create() if
+ *    @load_level_required >= 0 except -%ENXIO.
+ */
+static int
+pvr_mmu_op_context_load_tables(struct pvr_mmu_op_context *op_ctx,
+                              bool should_create,
+                              enum pvr_mmu_sync_level load_level_required)
+{
+       const struct pvr_page_table_l1 *l1_head_before =
+               op_ctx->map.l1_prealloc_tables;
+       const struct pvr_page_table_l0 *l0_head_before =
+               op_ctx->map.l0_prealloc_tables;
+       int err;
+
+       /* Clear tables we're about to fetch in case of error states. */
+       if (load_level_required >= PVR_MMU_SYNC_LEVEL_1)
+               op_ctx->curr_page.l1_table = NULL;
+
+       if (load_level_required >= PVR_MMU_SYNC_LEVEL_0)
+               op_ctx->curr_page.l0_table = NULL;
+
+       /* Get or create L1 page table. */
+       if (load_level_required >= PVR_MMU_SYNC_LEVEL_1) {
+               err = pvr_page_table_l1_get_or_insert(op_ctx, should_create);
+               if (err) {
+                       /*
+                        * If @should_create is %false and no L1 page table was
+                        * found, return early but without an error. Since
+                        * pvr_page_table_l1_get_or_create() can only return
+                        * -%ENXIO if @should_create is %false, there is no
+                        * need to check it here.
+                        */
+                       if (err == -ENXIO)
+                               err = 0;
+
+                       return err;
+               }
+       }
+
+       /* Get or create L0 page table. */
+       if (load_level_required >= PVR_MMU_SYNC_LEVEL_0) {
+               err = pvr_page_table_l0_get_or_insert(op_ctx, should_create);
+               if (err) {
+                       /*
+                        * If @should_create is %false and no L0 page table was
+                        * found, return early but without an error. Since
+                        * pvr_page_table_l0_get_or_insert() can only return
+                        * -%ENXIO if @should_create is %false, there is no
+                        * need to check it here.
+                        */
+                       if (err == -ENXIO)
+                               err = 0;
+
+                       /*
+                        * At this point, an L1 page table could have been
+                        * inserted but is now empty due to the failed attempt
+                        * at inserting an L0 page table. In this instance, we
+                        * must remove the empty L1 page table ourselves as
+                        * pvr_page_table_l1_remove() is never called as part
+                        * of the error path in
+                        * pvr_page_table_l0_get_or_insert().
+                        */
+                       if (l1_head_before != op_ctx->map.l1_prealloc_tables) {
+                               pvr_page_table_l2_remove(op_ctx);
+                               pvr_mmu_op_context_require_sync(op_ctx, PVR_MMU_SYNC_LEVEL_2);
+                       }
+
+                       return err;
+               }
+       }
+
+       /*
+        * A sync is only needed if table objects were inserted. This can be
+        * inferred by checking if the pointer at the head of the linked list
+        * has changed.
+        */
+       if (l1_head_before != op_ctx->map.l1_prealloc_tables)
+               pvr_mmu_op_context_require_sync(op_ctx, PVR_MMU_SYNC_LEVEL_2);
+       else if (l0_head_before != op_ctx->map.l0_prealloc_tables)
+               pvr_mmu_op_context_require_sync(op_ctx, PVR_MMU_SYNC_LEVEL_1);
+
+       return 0;
+}
+
+/**
+ * pvr_mmu_op_context_set_curr_page() - Reassign the current page of an MMU op
+ * context, syncing any page tables previously assigned to it which are no
+ * longer relevant.
+ * @op_ctx: Target MMU op context.
+ * @device_addr: New pointer target.
+ * @should_create: Specify whether new page tables should be created when
+ * empty page table entries are encountered during traversal.
+ *
+ * This function performs a full sync on the pointer, regardless of which
+ * levels are modified.
+ *
+ * Return:
+ *  * 0 on success, or
+ *  * Any error returned by pvr_mmu_op_context_load_tables().
+ */
+static int
+pvr_mmu_op_context_set_curr_page(struct pvr_mmu_op_context *op_ctx,
+                                u64 device_addr, bool should_create)
+{
+       pvr_mmu_op_context_sync(op_ctx);
+
+       op_ctx->curr_page.l2_idx = pvr_page_table_l2_idx(device_addr);
+       op_ctx->curr_page.l1_idx = pvr_page_table_l1_idx(device_addr);
+       op_ctx->curr_page.l0_idx = pvr_page_table_l0_idx(device_addr);
+       op_ctx->curr_page.l1_table = NULL;
+       op_ctx->curr_page.l0_table = NULL;
+
+       return pvr_mmu_op_context_load_tables(op_ctx, should_create,
+                                             PVR_MMU_SYNC_LEVEL_1);
+}
+
+/**
+ * pvr_mmu_op_context_next_page() - Advance the current page of an MMU op
+ * context.
+ * @op_ctx: Target MMU op context.
+ * @should_create: Specify whether new page tables should be created when
+ * empty page table entries are encountered during traversal.
+ *
+ * If @should_create is %false, it is the caller's responsibility to verify that
+ * the state of the table references in @op_ctx is valid on return. If -%ENXIO
+ * is returned, at least one of the table references is invalid. It should be
+ * noted that @op_ctx as a whole will be left in a valid state if -%ENXIO is
+ * returned, unlike other error codes. The caller should check which references
+ * are invalid by comparing them to %NULL. Only &@ptr->l2_table is guaranteed
+ * to be valid, since it represents the root of the page table tree structure.
+ *
+ * Return:
+ *  * 0 on success,
+ *  * -%EPERM if the operation would wrap at the top of the page table
+ *    hierarchy,
+ *  * -%ENXIO if @should_create is %false and a page table of any level would
+ *    have otherwise been created, or
+ *  * Any error returned while attempting to create missing page tables if
+ *    @should_create is %true.
+ */
+static int
+pvr_mmu_op_context_next_page(struct pvr_mmu_op_context *op_ctx,
+                            bool should_create)
+{
+       s8 load_level_required = PVR_MMU_SYNC_LEVEL_NONE;
+
+       if (++op_ctx->curr_page.l0_idx != ROGUE_MMUCTRL_ENTRIES_PT_VALUE_X)
+               goto load_tables;
+
+       op_ctx->curr_page.l0_idx = 0;
+       load_level_required = PVR_MMU_SYNC_LEVEL_0;
+
+       if (++op_ctx->curr_page.l1_idx != ROGUE_MMUCTRL_ENTRIES_PD_VALUE)
+               goto load_tables;
+
+       op_ctx->curr_page.l1_idx = 0;
+       load_level_required = PVR_MMU_SYNC_LEVEL_1;
+
+       if (++op_ctx->curr_page.l2_idx != ROGUE_MMUCTRL_ENTRIES_PC_VALUE)
+               goto load_tables;
+
+       /*
+        * If the pattern continued, we would set &op_ctx->curr_page.l2_idx to
+        * zero here. However, that would wrap the top layer of the page table
+        * hierarchy which is not a valid operation. Instead, we warn and return
+        * an error.
+        */
+       WARN(true,
+            "%s(%p) attempted to loop the top of the page table hierarchy",
+            __func__, op_ctx);
+       return -EPERM;
+
+       /* If indices have wrapped, we need to load new tables. */
+load_tables:
+       /* First, flush tables which will be unloaded. */
+       pvr_mmu_op_context_sync_partial(op_ctx, load_level_required);
+
+       /* Then load tables from the required level down. */
+       return pvr_mmu_op_context_load_tables(op_ctx, should_create,
+                                             load_level_required);
+}
+
+/**
+ * DOC: Single page operations
+ */
+
+/**
+ * pvr_page_create() - Create a device-virtual memory page and insert it into
+ * a level 0 page table.
+ * @op_ctx: Target MMU op context pointing at the device-virtual address of the
+ * target page.
+ * @dma_addr: DMA address of the physical page backing the created page.
+ * @flags: Page options saved on the level 0 page table entry for reading by
+ *         the device.
+ *
+ * Return:
+ *  * 0 on success, or
+ *  * -%EEXIST if the requested page already exists.
+ */
+static int
+pvr_page_create(struct pvr_mmu_op_context *op_ctx, dma_addr_t dma_addr,
+               struct pvr_page_flags_raw flags)
+{
+       /* Do not create a new page if one already exists. */
+       if (pvr_page_table_l0_entry_is_valid(op_ctx->curr_page.l0_table,
+                                            op_ctx->curr_page.l0_idx)) {
+               return -EEXIST;
+       }
+
+       pvr_page_table_l0_insert(op_ctx, dma_addr, flags);
+
+       pvr_mmu_op_context_require_sync(op_ctx, PVR_MMU_SYNC_LEVEL_0);
+
+       return 0;
+}
+
+/**
+ * pvr_page_destroy() - Destroy a device page after removing it from its
+ * parent level 0 page table.
+ * @op_ctx: Target MMU op context.
+ */
+static void
+pvr_page_destroy(struct pvr_mmu_op_context *op_ctx)
+{
+       /* Do nothing if the page does not exist. */
+       if (!pvr_page_table_l0_entry_is_valid(op_ctx->curr_page.l0_table,
+                                             op_ctx->curr_page.l0_idx)) {
+               return;
+       }
+
+       /* Clear the parent L0 page table entry. */
+       pvr_page_table_l0_remove(op_ctx);
+
+       pvr_mmu_op_context_require_sync(op_ctx, PVR_MMU_SYNC_LEVEL_0);
+}
+
+/**
+ * pvr_mmu_op_context_destroy() - Destroy an MMU op context.
+ * @op_ctx: Target MMU op context.
+ */
+void pvr_mmu_op_context_destroy(struct pvr_mmu_op_context *op_ctx)
+{
+       const bool flush_caches =
+               op_ctx->sync_level_required != PVR_MMU_SYNC_LEVEL_NONE;
+
+       pvr_mmu_op_context_sync(op_ctx);
+
+       /* Unmaps should be flushed immediately. Map flushes can be deferred. */
+       if (flush_caches && !op_ctx->map.sgt)
+               pvr_mmu_flush_exec(op_ctx->mmu_ctx->pvr_dev, true);
+
+       while (op_ctx->map.l0_prealloc_tables) {
+               struct pvr_page_table_l0 *tmp = op_ctx->map.l0_prealloc_tables;
+
+               op_ctx->map.l0_prealloc_tables =
+                       op_ctx->map.l0_prealloc_tables->next_free;
+               pvr_page_table_l0_free(tmp);
+       }
+
+       while (op_ctx->map.l1_prealloc_tables) {
+               struct pvr_page_table_l1 *tmp = op_ctx->map.l1_prealloc_tables;
+
+               op_ctx->map.l1_prealloc_tables =
+                       op_ctx->map.l1_prealloc_tables->next_free;
+               pvr_page_table_l1_free(tmp);
+       }
+
+       while (op_ctx->unmap.l0_free_tables) {
+               struct pvr_page_table_l0 *tmp = op_ctx->unmap.l0_free_tables;
+
+               op_ctx->unmap.l0_free_tables =
+                       op_ctx->unmap.l0_free_tables->next_free;
+               pvr_page_table_l0_free(tmp);
+       }
+
+       while (op_ctx->unmap.l1_free_tables) {
+               struct pvr_page_table_l1 *tmp = op_ctx->unmap.l1_free_tables;
+
+               op_ctx->unmap.l1_free_tables =
+                       op_ctx->unmap.l1_free_tables->next_free;
+               pvr_page_table_l1_free(tmp);
+       }
+
+       kfree(op_ctx);
+}
+
+/**
+ * pvr_mmu_op_context_create() - Create an MMU op context.
+ * @ctx: MMU context associated with owning VM context.
+ * @sgt: Scatter gather table containing pages pinned for use by this context.
+ * @sgt_offset: Start offset of the requested device-virtual memory mapping.
+ * @size: Size in bytes of the requested device-virtual memory mapping. For an
+ * unmapping, this should be zero so that no page tables are allocated.
+ *
+ * Returns:
+ *  * Newly created MMU op context object on success, or
+ *  * -%ENOMEM if no memory is available,
+ *  * Any error code returned by pvr_page_table_l2_init().
+ */
+struct pvr_mmu_op_context *
+pvr_mmu_op_context_create(struct pvr_mmu_context *ctx, struct sg_table *sgt,
+                         u64 sgt_offset, u64 size)
+{
+       int err;
+
+       struct pvr_mmu_op_context *op_ctx =
+               kzalloc(sizeof(*op_ctx), GFP_KERNEL);
+
+       if (!op_ctx)
+               return ERR_PTR(-ENOMEM);
+
+       op_ctx->mmu_ctx = ctx;
+       op_ctx->map.sgt = sgt;
+       op_ctx->map.sgt_offset = sgt_offset;
+       op_ctx->sync_level_required = PVR_MMU_SYNC_LEVEL_NONE;
+
+       if (size) {
+               /*
+                * The number of page table objects we need to prealloc is
+                * indicated by the mapping size, start offset and the sizes
+                * of the areas mapped per PT or PD. The range calculation is
+                * identical to that for the index into a table for a device
+                * address, so we reuse those functions here.
+                */
+               const u32 l1_start_idx = pvr_page_table_l2_idx(sgt_offset);
+               const u32 l1_end_idx = pvr_page_table_l2_idx(sgt_offset + size);
+               const u32 l1_count = l1_end_idx - l1_start_idx + 1;
+               const u32 l0_start_idx = pvr_page_table_l1_idx(sgt_offset);
+               const u32 l0_end_idx = pvr_page_table_l1_idx(sgt_offset + size);
+               const u32 l0_count = l0_end_idx - l0_start_idx + 1;
+
+               /*
+                * Alloc and push page table entries until we have enough of
+                * each type, ending with linked lists of l0 and l1 entries in
+                * reverse order.
+                */
+               for (int i = 0; i < l1_count; i++) {
+                       struct pvr_page_table_l1 *l1_tmp =
+                               pvr_page_table_l1_alloc(ctx);
+
+                       err = PTR_ERR_OR_ZERO(l1_tmp);
+                       if (err)
+                               goto err_cleanup;
+
+                       l1_tmp->next_free = op_ctx->map.l1_prealloc_tables;
+                       op_ctx->map.l1_prealloc_tables = l1_tmp;
+               }
+
+               for (int i = 0; i < l0_count; i++) {
+                       struct pvr_page_table_l0 *l0_tmp =
+                               pvr_page_table_l0_alloc(ctx);
+
+                       err = PTR_ERR_OR_ZERO(l0_tmp);
+                       if (err)
+                               goto err_cleanup;
+
+                       l0_tmp->next_free = op_ctx->map.l0_prealloc_tables;
+                       op_ctx->map.l0_prealloc_tables = l0_tmp;
+               }
+       }
+
+       return op_ctx;
+
+err_cleanup:
+       pvr_mmu_op_context_destroy(op_ctx);
+
+       return ERR_PTR(err);
+}
+
+/**
+ * pvr_mmu_op_context_unmap_curr_page() - Unmap pages from a memory context
+ * starting from the current page of an MMU op context.
+ * @op_ctx: Target MMU op context pointing at the first page to unmap.
+ * @nr_pages: Number of pages to unmap.
+ *
+ * Return:
+ *  * 0 on success, or
+ *  * Any error encountered while advancing @op_ctx.curr_page with
+ *    pvr_mmu_op_context_next_page() (except -%ENXIO).
+ */
+static int
+pvr_mmu_op_context_unmap_curr_page(struct pvr_mmu_op_context *op_ctx,
+                                  u64 nr_pages)
+{
+       int err;
+
+       if (nr_pages == 0)
+               return 0;
+
+       /*
+        * Destroy first page outside loop, as it doesn't require a page
+        * advance beforehand. If the L0 page table reference in
+        * @op_ctx.curr_page is %NULL, there cannot be a mapped page at
+        * @op_ctx.curr_page (so skip ahead).
+        */
+       if (op_ctx->curr_page.l0_table)
+               pvr_page_destroy(op_ctx);
+
+       for (u64 page = 1; page < nr_pages; ++page) {
+               err = pvr_mmu_op_context_next_page(op_ctx, false);
+               /*
+                * If the page table tree structure at @op_ctx.curr_page is
+                * incomplete, skip ahead. We don't care about unmapping pages
+                * that cannot exist.
+                *
+                * FIXME: This could be made more efficient by jumping ahead
+                * using pvr_mmu_op_context_set_curr_page().
+                */
+               if (err == -ENXIO)
+                       continue;
+               else if (err)
+                       return err;
+
+               pvr_page_destroy(op_ctx);
+       }
+
+       return 0;
+}
+
+/**
+ * pvr_mmu_unmap() - Unmap pages from a memory context.
+ * @op_ctx: Target MMU op context.
+ * @device_addr: First device-virtual address to unmap.
+ * @size: Size in bytes to unmap.
+ *
+ * The total amount of device-virtual memory unmapped is
+ * @nr_pages * %PVR_DEVICE_PAGE_SIZE.
+ *
+ * Returns:
+ *  * 0 on success, or
+ *  * Any error code returned by pvr_page_table_ptr_init(), or
+ *  * Any error code returned by pvr_page_table_ptr_unmap().
+ */
+int pvr_mmu_unmap(struct pvr_mmu_op_context *op_ctx, u64 device_addr, u64 size)
+{
+       int err = pvr_mmu_op_context_set_curr_page(op_ctx, device_addr, false);
+
+       if (err)
+               return err;
+
+       return pvr_mmu_op_context_unmap_curr_page(op_ctx,
+                                                 size >> PVR_DEVICE_PAGE_SHIFT);
+}
+
+/**
+ * pvr_mmu_map_sgl() - Map part of a scatter-gather table entry to
+ * device-virtual memory.
+ * @op_ctx: Target MMU op context pointing to the first page that should be
+ * mapped.
+ * @sgl: Target scatter-gather table entry.
+ * @offset: Offset into @sgl to map from. Must result in a starting address
+ * from @sgl which is CPU page-aligned.
+ * @size: Size of the memory to be mapped in bytes. Must be a non-zero multiple
+ * of the device page size.
+ * @page_flags: Page options to be applied to every device-virtual memory page
+ * in the created mapping.
+ *
+ * Return:
+ *  * 0 on success,
+ *  * -%EINVAL if the range specified by @offset and @size is not completely
+ *    within @sgl, or
+ *  * Any error encountered while creating a page with pvr_page_create(), or
+ *  * Any error encountered while advancing @op_ctx.curr_page with
+ *    pvr_mmu_op_context_next_page().
+ */
+static int
+pvr_mmu_map_sgl(struct pvr_mmu_op_context *op_ctx, struct scatterlist *sgl,
+               u64 offset, u64 size, struct pvr_page_flags_raw page_flags)
+{
+       const unsigned int pages = size >> PVR_DEVICE_PAGE_SHIFT;
+       dma_addr_t dma_addr = sg_dma_address(sgl) + offset;
+       const unsigned int dma_len = sg_dma_len(sgl);
+       struct pvr_page_table_ptr ptr_copy;
+       unsigned int page;
+       int err;
+
+       if (size > dma_len || offset > dma_len - size)
+               return -EINVAL;
+
+       /*
+        * Before progressing, save a copy of the start pointer so we can use
+        * it again if we enter an error state and have to destroy pages.
+        */
+       memcpy(&ptr_copy, &op_ctx->curr_page, sizeof(ptr_copy));
+
+       /*
+        * Create first page outside loop, as it doesn't require a page advance
+        * beforehand.
+        */
+       err = pvr_page_create(op_ctx, dma_addr, page_flags);
+       if (err)
+               return err;
+
+       for (page = 1; page < pages; ++page) {
+               err = pvr_mmu_op_context_next_page(op_ctx, true);
+               if (err)
+                       goto err_destroy_pages;
+
+               dma_addr += PVR_DEVICE_PAGE_SIZE;
+
+               err = pvr_page_create(op_ctx, dma_addr, page_flags);
+               if (err)
+                       goto err_destroy_pages;
+       }
+
+       return 0;
+
+err_destroy_pages:
+       memcpy(&op_ctx->curr_page, &ptr_copy, sizeof(op_ctx->curr_page));
+       err = pvr_mmu_op_context_unmap_curr_page(op_ctx, page);
+
+       return err;
+}
+
+/**
+ * pvr_mmu_map() - Map an object's virtual memory to physical memory.
+ * @op_ctx: Target MMU op context.
+ * @size: Size of memory to be mapped in bytes. Must be a non-zero multiple
+ * of the device page size.
+ * @flags: Flags from pvr_gem_object associated with the mapping.
+ * @device_addr: Virtual device address to map to. Must be device page-aligned.
+ *
+ * Returns:
+ *  * 0 on success, or
+ *  * Any error code returned by pvr_page_table_ptr_init(), or
+ *  * Any error code returned by pvr_mmu_map_sgl(), or
+ *  * Any error code returned by pvr_page_table_ptr_next_page().
+ */
+int pvr_mmu_map(struct pvr_mmu_op_context *op_ctx, u64 size, u64 flags,
+               u64 device_addr)
+{
+       struct pvr_page_table_ptr ptr_copy;
+       struct pvr_page_flags_raw flags_raw;
+       struct scatterlist *sgl;
+       u64 mapped_size = 0;
+       unsigned int count;
+       int err;
+
+       if (!size)
+               return 0;
+
+       if ((op_ctx->map.sgt_offset | size) & ~PVR_DEVICE_PAGE_MASK)
+               return -EINVAL;
+
+       err = pvr_mmu_op_context_set_curr_page(op_ctx, device_addr, true);
+       if (err)
+               return -EINVAL;
+
+       memcpy(&ptr_copy, &op_ctx->curr_page, sizeof(ptr_copy));
+
+       flags_raw = pvr_page_flags_raw_create(false, false,
+                                             flags & DRM_PVR_BO_BYPASS_DEVICE_CACHE,
+                                             flags & DRM_PVR_BO_PM_FW_PROTECT);
+
+       /* Map scatter gather table */
+       for_each_sgtable_dma_sg(op_ctx->map.sgt, sgl, count) {
+               const size_t sgl_len = sg_dma_len(sgl);
+               u64 sgl_offset, map_sgl_len;
+
+               if (sgl_len <= op_ctx->map.sgt_offset) {
+                       op_ctx->map.sgt_offset -= sgl_len;
+                       continue;
+               }
+
+               sgl_offset = op_ctx->map.sgt_offset;
+               map_sgl_len = min_t(u64, sgl_len - sgl_offset, size - mapped_size);
+
+               err = pvr_mmu_map_sgl(op_ctx, sgl, sgl_offset, map_sgl_len,
+                                     flags_raw);
+               if (err)
+                       break;
+
+               /*
+                * Flag the L0 page table as requiring a flush when the MMU op
+                * context is destroyed.
+                */
+               pvr_mmu_op_context_require_sync(op_ctx, PVR_MMU_SYNC_LEVEL_0);
+
+               op_ctx->map.sgt_offset = 0;
+               mapped_size += map_sgl_len;
+
+               if (mapped_size >= size)
+                       break;
+
+               err = pvr_mmu_op_context_next_page(op_ctx, true);
+               if (err)
+                       break;
+       }
+
+       if (err && mapped_size) {
+               memcpy(&op_ctx->curr_page, &ptr_copy, sizeof(op_ctx->curr_page));
+               pvr_mmu_op_context_unmap_curr_page(op_ctx,
+                                                  mapped_size >> PVR_DEVICE_PAGE_SHIFT);
+       }
+
+       return err;
+}
diff --git a/drivers/gpu/drm/imagination/pvr_mmu.h b/drivers/gpu/drm/imagination/pvr_mmu.h
new file mode 100644 (file)
index 0000000..a8ecd46
--- /dev/null
@@ -0,0 +1,108 @@
+/* SPDX-License-Identifier: GPL-2.0-only OR MIT */
+/* Copyright (c) 2023 Imagination Technologies Ltd. */
+
+#ifndef PVR_MMU_H
+#define PVR_MMU_H
+
+#include <linux/memory.h>
+#include <linux/types.h>
+
+/* Forward declaration from "pvr_device.h" */
+struct pvr_device;
+
+/* Forward declaration from "pvr_mmu.c" */
+struct pvr_mmu_context;
+struct pvr_mmu_op_context;
+
+/* Forward declaration from "pvr_vm.c" */
+struct pvr_vm_context;
+
+/* Forward declaration from <linux/scatterlist.h> */
+struct sg_table;
+
+/**
+ * DOC: Public API (constants)
+ *
+ * .. c:macro:: PVR_DEVICE_PAGE_SIZE
+ *
+ *    Fixed page size referenced by leaf nodes in the page table tree
+ *    structure. In the current implementation, this value is pegged to the
+ *    CPU page size (%PAGE_SIZE). It is therefore an error to specify a CPU
+ *    page size which is not also a supported device page size. The supported
+ *    device page sizes are: 4KiB, 16KiB, 64KiB, 256KiB, 1MiB and 2MiB.
+ *
+ * .. c:macro:: PVR_DEVICE_PAGE_SHIFT
+ *
+ *    Shift value used to efficiently multiply or divide by
+ *    %PVR_DEVICE_PAGE_SIZE.
+ *
+ *    This value is derived from %PVR_DEVICE_PAGE_SIZE.
+ *
+ * .. c:macro:: PVR_DEVICE_PAGE_MASK
+ *
+ *    Mask used to round a value down to the nearest multiple of
+ *    %PVR_DEVICE_PAGE_SIZE. When bitwise negated, it will indicate whether a
+ *    value is already a multiple of %PVR_DEVICE_PAGE_SIZE.
+ *
+ *    This value is derived from %PVR_DEVICE_PAGE_SIZE.
+ */
+
+/* PVR_DEVICE_PAGE_SIZE determines the page size */
+#define PVR_DEVICE_PAGE_SIZE (PAGE_SIZE)
+#define PVR_DEVICE_PAGE_SHIFT (PAGE_SHIFT)
+#define PVR_DEVICE_PAGE_MASK (PAGE_MASK)
+
+/**
+ * DOC: Page table index utilities (constants)
+ *
+ * .. c:macro:: PVR_PAGE_TABLE_ADDR_SPACE_SIZE
+ *
+ *    Size of device-virtual address space which can be represented in the page
+ *    table structure.
+ *
+ *    This value is checked at runtime against
+ *    &pvr_device_features.virtual_address_space_bits by
+ *    pvr_vm_create_context(), which will return an error if the feature value
+ *    does not match this constant.
+ *
+ *    .. admonition:: Future work
+ *
+ *       It should be possible to support other values of
+ *       &pvr_device_features.virtual_address_space_bits, but so far no
+ *       hardware has been created which advertises an unsupported value.
+ *
+ * .. c:macro:: PVR_PAGE_TABLE_ADDR_BITS
+ *
+ *    Number of bits needed to represent any value less than
+ *    %PVR_PAGE_TABLE_ADDR_SPACE_SIZE exactly.
+ *
+ * .. c:macro:: PVR_PAGE_TABLE_ADDR_MASK
+ *
+ *    Bitmask of device-virtual addresses which are valid in the page table
+ *    structure.
+ *
+ *    This value is derived from %PVR_PAGE_TABLE_ADDR_SPACE_SIZE, so the same
+ *    notes on that constant apply here.
+ */
+#define PVR_PAGE_TABLE_ADDR_SPACE_SIZE SZ_1T
+#define PVR_PAGE_TABLE_ADDR_BITS __ffs(PVR_PAGE_TABLE_ADDR_SPACE_SIZE)
+#define PVR_PAGE_TABLE_ADDR_MASK (PVR_PAGE_TABLE_ADDR_SPACE_SIZE - 1)
+
+void pvr_mmu_flush_request_all(struct pvr_device *pvr_dev);
+int pvr_mmu_flush_exec(struct pvr_device *pvr_dev, bool wait);
+
+struct pvr_mmu_context *pvr_mmu_context_create(struct pvr_device *pvr_dev);
+void pvr_mmu_context_destroy(struct pvr_mmu_context *ctx);
+
+dma_addr_t pvr_mmu_get_root_table_dma_addr(struct pvr_mmu_context *ctx);
+
+void pvr_mmu_op_context_destroy(struct pvr_mmu_op_context *op_ctx);
+struct pvr_mmu_op_context *
+pvr_mmu_op_context_create(struct pvr_mmu_context *ctx,
+                         struct sg_table *sgt, u64 sgt_offset, u64 size);
+
+int pvr_mmu_map(struct pvr_mmu_op_context *op_ctx, u64 size, u64 flags,
+               u64 device_addr);
+int pvr_mmu_unmap(struct pvr_mmu_op_context *op_ctx, u64 device_addr, u64 size);
+
+#endif /* PVR_MMU_H */
diff --git a/drivers/gpu/drm/imagination/pvr_vm.c b/drivers/gpu/drm/imagination/pvr_vm.c
new file mode 100644 (file)
index 0000000..4c48a20
--- /dev/null
@@ -0,0 +1,1091 @@
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
+/* Copyright (c) 2023 Imagination Technologies Ltd. */
+
+#include "pvr_vm.h"
+
+#include "pvr_device.h"
+#include "pvr_drv.h"
+#include "pvr_gem.h"
+#include "pvr_mmu.h"
+#include "pvr_rogue_fwif.h"
+#include "pvr_rogue_heap_config.h"
+
+#include <drm/drm_exec.h>
+#include <drm/drm_gem.h>
+#include <drm/drm_gpuvm.h>
+
+#include <linux/container_of.h>
+#include <linux/err.h>
+#include <linux/errno.h>
+#include <linux/gfp_types.h>
+#include <linux/kref.h>
+#include <linux/mutex.h>
+#include <linux/stddef.h>
+
+/**
+ * DOC: Memory context
+ *
+ * This is the "top level" datatype in the VM code. It's exposed in the public
+ * API as an opaque handle.
+ */
+
+/**
+ * struct pvr_vm_context - Context type used to represent a single VM.
+ */
+struct pvr_vm_context {
+       /**
+        * @pvr_dev: The PowerVR device to which this context is bound.
+        * This binding is immutable for the life of the context.
+        */
+       struct pvr_device *pvr_dev;
+
+       /** @mmu_ctx: The context for binding to physical memory. */
+       struct pvr_mmu_context *mmu_ctx;
+
+       /** @gpuva_mgr: GPUVA manager object associated with this context. */
+       struct drm_gpuvm gpuvm_mgr;
+
+       /** @lock: Global lock on this VM. */
+       struct mutex lock;
+
+       /**
+        * @fw_mem_ctx_obj: Firmware object representing firmware memory
+        * context.
+        */
+       struct pvr_fw_object *fw_mem_ctx_obj;
+
+       /** @ref_count: Reference count of object. */
+       struct kref ref_count;
+
+       /**
+        * @dummy_gem: GEM object to enable VM reservation. All private BOs
+        * should use the @dummy_gem.resv and not their own _resv field.
+        */
+       struct drm_gem_object dummy_gem;
+};
+
+struct pvr_vm_context *pvr_vm_context_get(struct pvr_vm_context *vm_ctx)
+{
+       if (vm_ctx)
+               kref_get(&vm_ctx->ref_count);
+
+       return vm_ctx;
+}
+
+/**
+ * pvr_vm_get_page_table_root_addr() - Get the DMA address of the root of the
+ *                                     page table structure behind a VM context.
+ * @vm_ctx: Target VM context.
+ */
+dma_addr_t pvr_vm_get_page_table_root_addr(struct pvr_vm_context *vm_ctx)
+{
+       return pvr_mmu_get_root_table_dma_addr(vm_ctx->mmu_ctx);
+}
+
+/**
+ * pvr_vm_get_dma_resv() - Expose the dma_resv owned by the VM context.
+ * @vm_ctx: Target VM context.
+ *
+ * This is used to allow private BOs to share a dma_resv for faster fence
+ * updates.
+ *
+ * Returns: The dma_resv pointer.
+ */
+struct dma_resv *pvr_vm_get_dma_resv(struct pvr_vm_context *vm_ctx)
+{
+       return vm_ctx->dummy_gem.resv;
+}
+
+/**
+ * DOC: Memory mappings
+ */
+
+/**
+ * struct pvr_vm_gpuva - Wrapper type representing a single VM mapping.
+ */
+struct pvr_vm_gpuva {
+       /** @base: The wrapped drm_gpuva object. */
+       struct drm_gpuva base;
+};
+
+static __always_inline
+struct pvr_vm_gpuva *to_pvr_vm_gpuva(struct drm_gpuva *gpuva)
+{
+       return container_of(gpuva, struct pvr_vm_gpuva, base);
+}
+
+enum pvr_vm_bind_type {
+       PVR_VM_BIND_TYPE_MAP,
+       PVR_VM_BIND_TYPE_UNMAP,
+};
+
+/**
+ * struct pvr_vm_bind_op - Context of a map/unmap operation.
+ */
+struct pvr_vm_bind_op {
+       /** @type: Map or unmap. */
+       enum pvr_vm_bind_type type;
+
+       /** @pvr_obj: Object associated with mapping (map only). */
+       struct pvr_gem_object *pvr_obj;
+
+       /**
+        * @vm_ctx: VM context where the mapping will be created or destroyed.
+        */
+       struct pvr_vm_context *vm_ctx;
+
+       /** @mmu_op_ctx: MMU op context. */
+       struct pvr_mmu_op_context *mmu_op_ctx;
+
+       /** @gpuvm_bo: Prealloced wrapped BO for attaching to the gpuvm. */
+       struct drm_gpuvm_bo *gpuvm_bo;
+
+       /**
+        * @new_va: Prealloced VA mapping object (init in callback).
+        * Used when creating a mapping.
+        */
+       struct pvr_vm_gpuva *new_va;
+
+       /**
+        * @prev_va: Prealloced VA mapping object (init in callback).
+        * Used when a mapping or unmapping operation overlaps an existing
+        * mapping and splits away the beginning into a new mapping.
+        */
+       struct pvr_vm_gpuva *prev_va;
+
+       /**
+        * @next_va: Prealloced VA mapping object (init in callback).
+        * Used when a mapping or unmapping operation overlaps an existing
+        * mapping and splits away the end into a new mapping.
+        */
+       struct pvr_vm_gpuva *next_va;
+
+       /** @offset: Offset into @pvr_obj to begin mapping from. */
+       u64 offset;
+
+       /** @device_addr: Device-virtual address at the start of the mapping. */
+       u64 device_addr;
+
+       /** @size: Size of the desired mapping. */
+       u64 size;
+};
+
+/**
+ * pvr_vm_bind_op_exec() - Execute a single bind op.
+ * @bind_op: Bind op context.
+ *
+ * Returns:
+ *  * 0 on success,
+ *  * Any error code returned by drm_gpuva_sm_map(), drm_gpuva_sm_unmap(), or
+ *    a callback function.
+ */
+static int pvr_vm_bind_op_exec(struct pvr_vm_bind_op *bind_op)
+{
+       switch (bind_op->type) {
+       case PVR_VM_BIND_TYPE_MAP:
+               return drm_gpuvm_sm_map(&bind_op->vm_ctx->gpuvm_mgr,
+                                       bind_op, bind_op->device_addr,
+                                       bind_op->size,
+                                       gem_from_pvr_gem(bind_op->pvr_obj),
+                                       bind_op->offset);
+
+       case PVR_VM_BIND_TYPE_UNMAP:
+               return drm_gpuvm_sm_unmap(&bind_op->vm_ctx->gpuvm_mgr,
+                                         bind_op, bind_op->device_addr,
+                                         bind_op->size);
+       }
+
+       /*
+        * This shouldn't happen unless something went wrong
+        * in drm_sched.
+        */
+       WARN_ON(1);
+       return -EINVAL;
+}
+
+static void pvr_vm_bind_op_fini(struct pvr_vm_bind_op *bind_op)
+{
+       drm_gpuvm_bo_put(bind_op->gpuvm_bo);
+
+       kfree(bind_op->new_va);
+       kfree(bind_op->prev_va);
+       kfree(bind_op->next_va);
+
+       if (bind_op->pvr_obj)
+               pvr_gem_object_put(bind_op->pvr_obj);
+
+       if (bind_op->mmu_op_ctx)
+               pvr_mmu_op_context_destroy(bind_op->mmu_op_ctx);
+}
+
+static int
+pvr_vm_bind_op_map_init(struct pvr_vm_bind_op *bind_op,
+                       struct pvr_vm_context *vm_ctx,
+                       struct pvr_gem_object *pvr_obj, u64 offset,
+                       u64 device_addr, u64 size)
+{
+       const bool is_user = vm_ctx == vm_ctx->pvr_dev->kernel_vm_ctx;
+       const u64 pvr_obj_size = pvr_gem_object_size(pvr_obj);
+       struct sg_table *sgt;
+       u64 offset_plus_size;
+       int err;
+
+       if (check_add_overflow(offset, size, &offset_plus_size))
+               return -EINVAL;
+
+       if (is_user &&
+           !pvr_find_heap_containing(vm_ctx->pvr_dev, device_addr, size)) {
+               return -EINVAL;
+       }
+
+       if (!pvr_device_addr_and_size_are_valid(device_addr, size) ||
+           offset & ~PAGE_MASK || size & ~PAGE_MASK ||
+           offset >= pvr_obj_size || offset_plus_size > pvr_obj_size)
+               return -EINVAL;
+
+       bind_op->type = PVR_VM_BIND_TYPE_MAP;
+
+       bind_op->gpuvm_bo = drm_gpuvm_bo_create(&vm_ctx->gpuvm_mgr,
+                                               gem_from_pvr_gem(pvr_obj));
+       if (!bind_op->gpuvm_bo)
+               return -ENOMEM;
+
+       bind_op->new_va = kzalloc(sizeof(*bind_op->new_va), GFP_KERNEL);
+       bind_op->prev_va = kzalloc(sizeof(*bind_op->prev_va), GFP_KERNEL);
+       bind_op->next_va = kzalloc(sizeof(*bind_op->next_va), GFP_KERNEL);
+       if (!bind_op->new_va || !bind_op->prev_va || !bind_op->next_va) {
+               err = -ENOMEM;
+               goto err_bind_op_fini;
+       }
+
+       /* Pin pages so they're ready for use. */
+       sgt = pvr_gem_object_get_pages_sgt(pvr_obj);
+       err = PTR_ERR_OR_ZERO(sgt);
+       if (err)
+               goto err_bind_op_fini;
+
+       bind_op->mmu_op_ctx =
+               pvr_mmu_op_context_create(vm_ctx->mmu_ctx, sgt, offset, size);
+       err = PTR_ERR_OR_ZERO(bind_op->mmu_op_ctx);
+       if (err) {
+               bind_op->mmu_op_ctx = NULL;
+               goto err_bind_op_fini;
+       }
+
+       bind_op->pvr_obj = pvr_obj;
+       bind_op->vm_ctx = vm_ctx;
+       bind_op->device_addr = device_addr;
+       bind_op->size = size;
+       bind_op->offset = offset;
+
+       return 0;
+
+err_bind_op_fini:
+       pvr_vm_bind_op_fini(bind_op);
+
+       return err;
+}
+
+static int
+pvr_vm_bind_op_unmap_init(struct pvr_vm_bind_op *bind_op,
+                         struct pvr_vm_context *vm_ctx, u64 device_addr,
+                         u64 size)
+{
+       int err;
+
+       if (!pvr_device_addr_and_size_are_valid(device_addr, size))
+               return -EINVAL;
+
+       bind_op->type = PVR_VM_BIND_TYPE_UNMAP;
+
+       bind_op->prev_va = kzalloc(sizeof(*bind_op->prev_va), GFP_KERNEL);
+       bind_op->next_va = kzalloc(sizeof(*bind_op->next_va), GFP_KERNEL);
+       if (!bind_op->prev_va || !bind_op->next_va) {
+               err = -ENOMEM;
+               goto err_bind_op_fini;
+       }
+
+       bind_op->mmu_op_ctx =
+               pvr_mmu_op_context_create(vm_ctx->mmu_ctx, NULL, 0, 0);
+       err = PTR_ERR_OR_ZERO(bind_op->mmu_op_ctx);
+       if (err) {
+               bind_op->mmu_op_ctx = NULL;
+               goto err_bind_op_fini;
+       }
+
+       bind_op->vm_ctx = vm_ctx;
+       bind_op->device_addr = device_addr;
+       bind_op->size = size;
+
+       return 0;
+
+err_bind_op_fini:
+       pvr_vm_bind_op_fini(bind_op);
+
+       return err;
+}
+
+static int
+pvr_vm_bind_op_lock_resvs(struct drm_exec *exec, struct pvr_vm_bind_op *bind_op)
+{
+       drm_exec_until_all_locked(exec) {
+               struct drm_gem_object *r_obj = &bind_op->vm_ctx->dummy_gem;
+               struct drm_gpuvm *gpuvm = &bind_op->vm_ctx->gpuvm_mgr;
+               struct pvr_gem_object *pvr_obj = bind_op->pvr_obj;
+               struct drm_gpuvm_bo *gpuvm_bo;
+
+               /* Acquire lock on the vm_context's reserve object. */
+               int err = drm_exec_lock_obj(exec, r_obj);
+
+               drm_exec_retry_on_contention(exec);
+               if (err)
+                       return err;
+
+               /* Acquire lock on all BOs in the context. */
+               list_for_each_entry(gpuvm_bo, &gpuvm->extobj.list,
+                                   list.entry.extobj) {
+                       err = drm_exec_lock_obj(exec, gpuvm_bo->obj);
+
+                       drm_exec_retry_on_contention(exec);
+                       if (err)
+                               return err;
+               }
+
+               /* Unmap operations don't have an object to lock. */
+               if (!pvr_obj)
+                       break;
+
+               /* Acquire lock on the GEM being mapped. */
+               err = drm_exec_lock_obj(exec,
+                                       gem_from_pvr_gem(bind_op->pvr_obj));
+
+               drm_exec_retry_on_contention(exec);
+               if (err)
+                       return err;
+       }
+
+       return 0;
+}
+
+/**
+ * pvr_vm_gpuva_map() - Insert a mapping into a memory context.
+ * @op: gpuva op containing the remap details.
+ * @op_ctx: Operation context.
+ *
+ * Context: Called by drm_gpuvm_sm_map following a successful mapping while
+ * @op_ctx.vm_ctx mutex is held.
+ *
+ * Return:
+ *  * 0 on success, or
+ *  * Any error returned by pvr_mmu_map().
+ */
+static int
+pvr_vm_gpuva_map(struct drm_gpuva_op *op, void *op_ctx)
+{
+       struct pvr_gem_object *pvr_gem = gem_to_pvr_gem(op->map.gem.obj);
+       struct pvr_vm_bind_op *ctx = op_ctx;
+       int err;
+
+       if ((op->map.gem.offset | op->map.va.range) & ~PVR_DEVICE_PAGE_MASK)
+               return -EINVAL;
+
+       err = pvr_mmu_map(ctx->mmu_op_ctx, op->map.va.range, pvr_gem->flags,
+                         op->map.va.addr);
+       if (err)
+               return err;
+
+       drm_gpuva_map(&ctx->vm_ctx->gpuvm_mgr, &ctx->new_va->base, &op->map);
+       drm_gpuva_link(&ctx->new_va->base, ctx->gpuvm_bo);
+       ctx->new_va = NULL;
+
+       return 0;
+}
+
+/**
+ * pvr_vm_gpuva_unmap() - Remove a mapping from a memory context.
+ * @op: gpuva op containing the unmap details.
+ * @op_ctx: Operation context.
+ *
+ * Context: Called by drm_gpuvm_sm_unmap following a successful unmapping while
+ * @op_ctx.vm_ctx mutex is held.
+ *
+ * Return:
+ *  * 0 on success, or
+ *  * Any error returned by pvr_mmu_unmap().
+ */
+static int
+pvr_vm_gpuva_unmap(struct drm_gpuva_op *op, void *op_ctx)
+{
+       struct pvr_vm_bind_op *ctx = op_ctx;
+
+       int err = pvr_mmu_unmap(ctx->mmu_op_ctx, op->unmap.va->va.addr,
+                               op->unmap.va->va.range);
+
+       if (err)
+               return err;
+
+       drm_gpuva_unmap(&op->unmap);
+       drm_gpuva_unlink(op->unmap.va);
+
+       return 0;
+}
+
+/**
+ * pvr_vm_gpuva_remap() - Remap a mapping within a memory context.
+ * @op: gpuva op containing the remap details.
+ * @op_ctx: Operation context.
+ *
+ * Context: Called by either drm_gpuvm_sm_map or drm_gpuvm_sm_unmap when a
+ * mapping or unmapping operation causes a region to be split. The
+ * @op_ctx.vm_ctx mutex is held.
+ *
+ * Return:
+ *  * 0 on success, or
+ *  * Any error returned by pvr_vm_gpuva_unmap() or pvr_vm_gpuva_unmap().
+ */
+static int
+pvr_vm_gpuva_remap(struct drm_gpuva_op *op, void *op_ctx)
+{
+       struct pvr_vm_bind_op *ctx = op_ctx;
+       u64 va_start = 0, va_range = 0;
+       int err;
+
+       drm_gpuva_op_remap_to_unmap_range(&op->remap, &va_start, &va_range);
+       err = pvr_mmu_unmap(ctx->mmu_op_ctx, va_start, va_range);
+       if (err)
+               return err;
+
+       /* No actual remap required: the page table tree depth is fixed to 3,
+        * and we use 4k page table entries only for now.
+        */
+       drm_gpuva_remap(&ctx->prev_va->base, &ctx->next_va->base, &op->remap);
+
+       if (op->remap.prev) {
+               pvr_gem_object_get(gem_to_pvr_gem(ctx->prev_va->base.gem.obj));
+               drm_gpuva_link(&ctx->prev_va->base, ctx->gpuvm_bo);
+               ctx->prev_va = NULL;
+       }
+
+       if (op->remap.next) {
+               pvr_gem_object_get(gem_to_pvr_gem(ctx->next_va->base.gem.obj));
+               drm_gpuva_link(&ctx->next_va->base, ctx->gpuvm_bo);
+               ctx->next_va = NULL;
+       }
+
+       drm_gpuva_unlink(op->remap.unmap->va);
+
+       return 0;
+}
+
+/*
+ * Public API
+ *
+ * For an overview of these functions, see *DOC: Public API* in "pvr_vm.h".
+ */
+
+/**
+ * pvr_device_addr_is_valid() - Tests whether a device-virtual address
+ *                              is valid.
+ * @device_addr: Virtual device address to test.
+ *
+ * Return:
+ *  * %true if @device_addr is within the valid range for a device page
+ *    table and is aligned to the device page size, or
+ *  * %false otherwise.
+ */
+bool
+pvr_device_addr_is_valid(u64 device_addr)
+{
+       return (device_addr & ~PVR_PAGE_TABLE_ADDR_MASK) == 0 &&
+              (device_addr & ~PVR_DEVICE_PAGE_MASK) == 0;
+}
+
+/**
+ * pvr_device_addr_and_size_are_valid() - Tests whether a device-virtual
+ * address and associated size are both valid.
+ * @device_addr: Virtual device address to test.
+ * @size: Size of the range based at @device_addr to test.
+ *
+ * Calling pvr_device_addr_is_valid() twice (once on @size, and again on
+ * @device_addr + @size) to verify a device-virtual address range initially
+ * seems intuitive, but it produces a false-negative when the address range
+ * is right at the end of device-virtual address space.
+ *
+ * This function catches that corner case, as well as checking that
+ * @size is non-zero.
+ *
+ * Return:
+ *  * %true if @device_addr is device page aligned; @size is device page
+ *    aligned; the range specified by @device_addr and @size is within the
+ *    bounds of the device-virtual address space, and @size is non-zero, or
+ *  * %false otherwise.
+ */
+bool
+pvr_device_addr_and_size_are_valid(u64 device_addr, u64 size)
+{
+       return pvr_device_addr_is_valid(device_addr) &&
+              size != 0 && (size & ~PVR_DEVICE_PAGE_MASK) == 0 &&
+              (device_addr + size <= PVR_PAGE_TABLE_ADDR_SPACE_SIZE);
+}
+
+void pvr_gpuvm_free(struct drm_gpuvm *gpuvm)
+{
+
+}
+
+static const struct drm_gpuvm_ops pvr_vm_gpuva_ops = {
+       .vm_free = pvr_gpuvm_free,
+       .sm_step_map = pvr_vm_gpuva_map,
+       .sm_step_remap = pvr_vm_gpuva_remap,
+       .sm_step_unmap = pvr_vm_gpuva_unmap,
+};
+
+/**
+ * pvr_vm_create_context() - Create a new VM context.
+ * @pvr_dev: Target PowerVR device.
+ * @is_userspace_context: %true if this context is for userspace. This will
+ *                        create a firmware memory context for the VM context
+ *                        and disable warnings when tearing down mappings.
+ *
+ * Return:
+ *  * A handle to the newly-minted VM context on success,
+ *  * -%EINVAL if the feature "virtual address space bits" on @pvr_dev is
+ *    missing or has an unsupported value,
+ *  * -%ENOMEM if allocation of the structure behind the opaque handle fails,
+ *    or
+ *  * Any error encountered while setting up internal structures.
+ */
+struct pvr_vm_context *
+pvr_vm_create_context(struct pvr_device *pvr_dev, bool is_userspace_context)
+{
+       struct drm_device *drm_dev = from_pvr_device(pvr_dev);
+
+       struct pvr_vm_context *vm_ctx;
+       u16 device_addr_bits;
+
+       int err;
+
+       err = PVR_FEATURE_VALUE(pvr_dev, virtual_address_space_bits,
+                               &device_addr_bits);
+       if (err) {
+               drm_err(drm_dev,
+                       "Failed to get device virtual address space bits\n");
+               return ERR_PTR(err);
+       }
+
+       if (device_addr_bits != PVR_PAGE_TABLE_ADDR_BITS) {
+               drm_err(drm_dev,
+                       "Device has unsupported virtual address space size\n");
+               return ERR_PTR(-EINVAL);
+       }
+
+       vm_ctx = kzalloc(sizeof(*vm_ctx), GFP_KERNEL);
+       if (!vm_ctx)
+               return ERR_PTR(-ENOMEM);
+
+       drm_gem_private_object_init(&pvr_dev->base, &vm_ctx->dummy_gem, 0);
+
+       vm_ctx->pvr_dev = pvr_dev;
+       kref_init(&vm_ctx->ref_count);
+       mutex_init(&vm_ctx->lock);
+
+       drm_gpuvm_init(&vm_ctx->gpuvm_mgr,
+                      is_userspace_context ? "PowerVR-user-VM" : "PowerVR-FW-VM",
+                      0, &pvr_dev->base, &vm_ctx->dummy_gem,
+                      0, 1ULL << device_addr_bits, 0, 0, &pvr_vm_gpuva_ops);
+
+       vm_ctx->mmu_ctx = pvr_mmu_context_create(pvr_dev);
+       err = PTR_ERR_OR_ZERO(&vm_ctx->mmu_ctx);
+       if (err) {
+               vm_ctx->mmu_ctx = NULL;
+               goto err_put_ctx;
+       }
+
+       if (is_userspace_context) {
+               /* TODO: Create FW mem context */
+               err = -ENODEV;
+               goto err_put_ctx;
+       }
+
+       return vm_ctx;
+
+err_put_ctx:
+       pvr_vm_context_put(vm_ctx);
+
+       return ERR_PTR(err);
+}
+
+/**
+ * pvr_vm_context_release() - Teardown a VM context.
+ * @ref_count: Pointer to reference counter of the VM context.
+ *
+ * This function ensures that no mappings are left dangling by unmapping them
+ * all in order of ascending device-virtual address.
+ */
+static void
+pvr_vm_context_release(struct kref *ref_count)
+{
+       struct pvr_vm_context *vm_ctx =
+               container_of(ref_count, struct pvr_vm_context, ref_count);
+
+       /* TODO: Destroy FW mem context */
+       WARN_ON(vm_ctx->fw_mem_ctx_obj);
+
+       WARN_ON(pvr_vm_unmap(vm_ctx, vm_ctx->gpuvm_mgr.mm_start,
+                            vm_ctx->gpuvm_mgr.mm_range));
+
+       drm_gpuvm_put(&vm_ctx->gpuvm_mgr);
+       pvr_mmu_context_destroy(vm_ctx->mmu_ctx);
+       drm_gem_private_object_fini(&vm_ctx->dummy_gem);
+       mutex_destroy(&vm_ctx->lock);
+
+       kfree(vm_ctx);
+}
+
+/**
+ * pvr_vm_context_lookup() - Look up VM context from handle
+ * @pvr_file: Pointer to pvr_file structure.
+ * @handle: Object handle.
+ *
+ * Takes reference on VM context object. Call pvr_vm_context_put() to release.
+ *
+ * Returns:
+ *  * The requested object on success, or
+ *  * %NULL on failure (object does not exist in list, or is not a VM context)
+ */
+struct pvr_vm_context *
+pvr_vm_context_lookup(struct pvr_file *pvr_file, u32 handle)
+{
+       struct pvr_vm_context *vm_ctx;
+
+       xa_lock(&pvr_file->vm_ctx_handles);
+       vm_ctx = xa_load(&pvr_file->vm_ctx_handles, handle);
+       if (vm_ctx)
+               kref_get(&vm_ctx->ref_count);
+
+       xa_unlock(&pvr_file->vm_ctx_handles);
+
+       return vm_ctx;
+}
+
+/**
+ * pvr_vm_context_put() - Release a reference on a VM context
+ * @vm_ctx: Target VM context.
+ *
+ * Returns:
+ *  * %true if the VM context was destroyed, or
+ *  * %false if there are any references still remaining.
+ */
+bool
+pvr_vm_context_put(struct pvr_vm_context *vm_ctx)
+{
+       if (vm_ctx)
+               return kref_put(&vm_ctx->ref_count, pvr_vm_context_release);
+
+       return true;
+}
+
+/**
+ * pvr_destroy_vm_contexts_for_file: Destroy any VM contexts associated with the
+ * given file.
+ * @pvr_file: Pointer to pvr_file structure.
+ *
+ * Removes all vm_contexts associated with @pvr_file from the device VM context
+ * list and drops initial references. vm_contexts will then be destroyed once
+ * all outstanding references are dropped.
+ */
+void pvr_destroy_vm_contexts_for_file(struct pvr_file *pvr_file)
+{
+       struct pvr_vm_context *vm_ctx;
+       unsigned long handle;
+
+       xa_for_each(&pvr_file->vm_ctx_handles, handle, vm_ctx) {
+               /* vm_ctx is not used here because that would create a race with xa_erase */
+               pvr_vm_context_put(xa_erase(&pvr_file->vm_ctx_handles, handle));
+       }
+}
+
+/**
+ * pvr_vm_map() - Map a section of physical memory into a section of
+ * device-virtual memory.
+ * @vm_ctx: Target VM context.
+ * @pvr_obj: Target PowerVR memory object.
+ * @pvr_obj_offset: Offset into @pvr_obj to map from.
+ * @device_addr: Virtual device address at the start of the requested mapping.
+ * @size: Size of the requested mapping.
+ *
+ * No handle is returned to represent the mapping. Instead, callers should
+ * remember @device_addr and use that as a handle.
+ *
+ * Return:
+ *  * 0 on success,
+ *  * -%EINVAL if @device_addr is not a valid page-aligned device-virtual
+ *    address; the region specified by @pvr_obj_offset and @size does not fall
+ *    entirely within @pvr_obj, or any part of the specified region of @pvr_obj
+ *    is not device-virtual page-aligned,
+ *  * Any error encountered while performing internal operations required to
+ *    destroy the mapping (returned from pvr_vm_gpuva_map or
+ *    pvr_vm_gpuva_remap).
+ */
+int
+pvr_vm_map(struct pvr_vm_context *vm_ctx, struct pvr_gem_object *pvr_obj,
+          u64 pvr_obj_offset, u64 device_addr, u64 size)
+{
+       struct pvr_vm_bind_op bind_op = {0};
+       struct drm_exec exec;
+
+       int err = pvr_vm_bind_op_map_init(&bind_op, vm_ctx, pvr_obj,
+                                         pvr_obj_offset, device_addr,
+                                         size);
+
+       if (err)
+               return err;
+
+       drm_exec_init(&exec,
+                     DRM_EXEC_INTERRUPTIBLE_WAIT | DRM_EXEC_IGNORE_DUPLICATES);
+
+       pvr_gem_object_get(pvr_obj);
+
+       err = pvr_vm_bind_op_lock_resvs(&exec, &bind_op);
+       if (err)
+               goto err_cleanup;
+
+       err = pvr_vm_bind_op_exec(&bind_op);
+
+       drm_exec_fini(&exec);
+
+err_cleanup:
+       pvr_vm_bind_op_fini(&bind_op);
+
+       return err;
+}
+
+/**
+ * pvr_vm_unmap() - Unmap an already mapped section of device-virtual memory.
+ * @vm_ctx: Target VM context.
+ * @device_addr: Virtual device address at the start of the target mapping.
+ * @size: Size of the target mapping.
+ *
+ * Return:
+ *  * 0 on success,
+ *  * -%EINVAL if @device_addr is not a valid page-aligned device-virtual
+ *    address,
+ *  * Any error encountered while performing internal operations required to
+ *    destroy the mapping (returned from pvr_vm_gpuva_unmap or
+ *    pvr_vm_gpuva_remap).
+ */
+int
+pvr_vm_unmap(struct pvr_vm_context *vm_ctx, u64 device_addr, u64 size)
+{
+       struct pvr_vm_bind_op bind_op = {0};
+       struct drm_exec exec;
+
+       int err = pvr_vm_bind_op_unmap_init(&bind_op, vm_ctx, device_addr,
+                                           size);
+
+       if (err)
+               return err;
+
+       drm_exec_init(&exec,
+                     DRM_EXEC_INTERRUPTIBLE_WAIT | DRM_EXEC_IGNORE_DUPLICATES);
+
+       err = pvr_vm_bind_op_lock_resvs(&exec, &bind_op);
+       if (err)
+               goto err_cleanup;
+
+       err = pvr_vm_bind_op_exec(&bind_op);
+
+       drm_exec_fini(&exec);
+
+err_cleanup:
+       pvr_vm_bind_op_fini(&bind_op);
+
+       return err;
+}
+
+/* Static data areas are determined by firmware. */
+static const struct drm_pvr_static_data_area static_data_areas[] = {
+       {
+               .area_usage = DRM_PVR_STATIC_DATA_AREA_FENCE,
+               .location_heap_id = DRM_PVR_HEAP_GENERAL,
+               .offset = 0,
+               .size = 128,
+       },
+       {
+               .area_usage = DRM_PVR_STATIC_DATA_AREA_YUV_CSC,
+               .location_heap_id = DRM_PVR_HEAP_GENERAL,
+               .offset = 128,
+               .size = 1024,
+       },
+       {
+               .area_usage = DRM_PVR_STATIC_DATA_AREA_VDM_SYNC,
+               .location_heap_id = DRM_PVR_HEAP_PDS_CODE_DATA,
+               .offset = 0,
+               .size = 128,
+       },
+       {
+               .area_usage = DRM_PVR_STATIC_DATA_AREA_EOT,
+               .location_heap_id = DRM_PVR_HEAP_PDS_CODE_DATA,
+               .offset = 128,
+               .size = 128,
+       },
+       {
+               .area_usage = DRM_PVR_STATIC_DATA_AREA_VDM_SYNC,
+               .location_heap_id = DRM_PVR_HEAP_USC_CODE,
+               .offset = 0,
+               .size = 128,
+       },
+};
+
+#define GET_RESERVED_SIZE(last_offset, last_size) round_up((last_offset) + (last_size), PAGE_SIZE)
+
+/*
+ * The values given to GET_RESERVED_SIZE() are taken from the last entry in the corresponding
+ * static data area for each heap.
+ */
+static const struct drm_pvr_heap pvr_heaps[] = {
+       [DRM_PVR_HEAP_GENERAL] = {
+               .base = ROGUE_GENERAL_HEAP_BASE,
+               .size = ROGUE_GENERAL_HEAP_SIZE,
+               .flags = 0,
+               .page_size_log2 = PVR_DEVICE_PAGE_SHIFT,
+       },
+       [DRM_PVR_HEAP_PDS_CODE_DATA] = {
+               .base = ROGUE_PDSCODEDATA_HEAP_BASE,
+               .size = ROGUE_PDSCODEDATA_HEAP_SIZE,
+               .flags = 0,
+               .page_size_log2 = PVR_DEVICE_PAGE_SHIFT,
+       },
+       [DRM_PVR_HEAP_USC_CODE] = {
+               .base = ROGUE_USCCODE_HEAP_BASE,
+               .size = ROGUE_USCCODE_HEAP_SIZE,
+               .flags = 0,
+               .page_size_log2 = PVR_DEVICE_PAGE_SHIFT,
+       },
+       [DRM_PVR_HEAP_RGNHDR] = {
+               .base = ROGUE_RGNHDR_HEAP_BASE,
+               .size = ROGUE_RGNHDR_HEAP_SIZE,
+               .flags = 0,
+               .page_size_log2 = PVR_DEVICE_PAGE_SHIFT,
+       },
+       [DRM_PVR_HEAP_VIS_TEST] = {
+               .base = ROGUE_VISTEST_HEAP_BASE,
+               .size = ROGUE_VISTEST_HEAP_SIZE,
+               .flags = 0,
+               .page_size_log2 = PVR_DEVICE_PAGE_SHIFT,
+       },
+       [DRM_PVR_HEAP_TRANSFER_FRAG] = {
+               .base = ROGUE_TRANSFER_FRAG_HEAP_BASE,
+               .size = ROGUE_TRANSFER_FRAG_HEAP_SIZE,
+               .flags = 0,
+               .page_size_log2 = PVR_DEVICE_PAGE_SHIFT,
+       },
+};
+
+int
+pvr_static_data_areas_get(const struct pvr_device *pvr_dev,
+                         struct drm_pvr_ioctl_dev_query_args *args)
+{
+       struct drm_pvr_dev_query_static_data_areas query = {0};
+       int err;
+
+       if (!args->pointer) {
+               args->size = sizeof(struct drm_pvr_dev_query_static_data_areas);
+               return 0;
+       }
+
+       err = PVR_UOBJ_GET(query, args->size, args->pointer);
+       if (err < 0)
+               return err;
+
+       if (!query.static_data_areas.array) {
+               query.static_data_areas.count = ARRAY_SIZE(static_data_areas);
+               query.static_data_areas.stride = sizeof(struct drm_pvr_static_data_area);
+               goto copy_out;
+       }
+
+       if (query.static_data_areas.count > ARRAY_SIZE(static_data_areas))
+               query.static_data_areas.count = ARRAY_SIZE(static_data_areas);
+
+       err = PVR_UOBJ_SET_ARRAY(&query.static_data_areas, static_data_areas);
+       if (err < 0)
+               return err;
+
+copy_out:
+       err = PVR_UOBJ_SET(args->pointer, args->size, query);
+       if (err < 0)
+               return err;
+
+       args->size = sizeof(query);
+       return 0;
+}
+
+int
+pvr_heap_info_get(const struct pvr_device *pvr_dev,
+                 struct drm_pvr_ioctl_dev_query_args *args)
+{
+       struct drm_pvr_dev_query_heap_info query = {0};
+       u64 dest;
+       int err;
+
+       if (!args->pointer) {
+               args->size = sizeof(struct drm_pvr_dev_query_heap_info);
+               return 0;
+       }
+
+       err = PVR_UOBJ_GET(query, args->size, args->pointer);
+       if (err < 0)
+               return err;
+
+       if (!query.heaps.array) {
+               query.heaps.count = ARRAY_SIZE(pvr_heaps);
+               query.heaps.stride = sizeof(struct drm_pvr_heap);
+               goto copy_out;
+       }
+
+       if (query.heaps.count > ARRAY_SIZE(pvr_heaps))
+               query.heaps.count = ARRAY_SIZE(pvr_heaps);
+
+       /* Region header heap is only present if BRN63142 is present. */
+       dest = query.heaps.array;
+       for (size_t i = 0; i < query.heaps.count; i++) {
+               struct drm_pvr_heap heap = pvr_heaps[i];
+
+               if (i == DRM_PVR_HEAP_RGNHDR && !PVR_HAS_QUIRK(pvr_dev, 63142))
+                       heap.size = 0;
+
+               err = PVR_UOBJ_SET(dest, query.heaps.stride, heap);
+               if (err < 0)
+                       return err;
+
+               dest += query.heaps.stride;
+       }
+
+copy_out:
+       err = PVR_UOBJ_SET(args->pointer, args->size, query);
+       if (err < 0)
+               return err;
+
+       args->size = sizeof(query);
+       return 0;
+}
+
+/**
+ * pvr_heap_contains_range() - Determine if a given heap contains the specified
+ *                             device-virtual address range.
+ * @pvr_heap: Target heap.
+ * @start: Inclusive start of the target range.
+ * @end: Inclusive end of the target range.
+ *
+ * It is an error to call this function with values of @start and @end that do
+ * not satisfy the condition @start <= @end.
+ */
+static __always_inline bool
+pvr_heap_contains_range(const struct drm_pvr_heap *pvr_heap, u64 start, u64 end)
+{
+       return pvr_heap->base <= start && end < pvr_heap->base + pvr_heap->size;
+}
+
+/**
+ * pvr_find_heap_containing() - Find a heap which contains the specified
+ *                              device-virtual address range.
+ * @pvr_dev: Target PowerVR device.
+ * @start: Start of the target range.
+ * @size: Size of the target range.
+ *
+ * Return:
+ *  * A pointer to a constant instance of struct drm_pvr_heap representing the
+ *    heap containing the entire range specified by @start and @size on
+ *    success, or
+ *  * %NULL if no such heap exists.
+ */
+const struct drm_pvr_heap *
+pvr_find_heap_containing(struct pvr_device *pvr_dev, u64 start, u64 size)
+{
+       u64 end;
+
+       if (check_add_overflow(start, size - 1, &end))
+               return NULL;
+
+       /*
+        * There are no guarantees about the order of address ranges in
+        * &pvr_heaps, so iterate over the entire array for a heap whose
+        * range completely encompasses the given range.
+        */
+       for (u32 heap_id = 0; heap_id < ARRAY_SIZE(pvr_heaps); heap_id++) {
+               /* Filter heaps that present only with an associated quirk */
+               if (heap_id == DRM_PVR_HEAP_RGNHDR &&
+                   !PVR_HAS_QUIRK(pvr_dev, 63142)) {
+                       continue;
+               }
+
+               if (pvr_heap_contains_range(&pvr_heaps[heap_id], start, end))
+                       return &pvr_heaps[heap_id];
+       }
+
+       return NULL;
+}
+
+/**
+ * pvr_vm_find_gem_object() - Look up a buffer object from a given
+ *                            device-virtual address.
+ * @vm_ctx: [IN] Target VM context.
+ * @device_addr: [IN] Virtual device address at the start of the required
+ *               object.
+ * @mapped_offset_out: [OUT] Pointer to location to write offset of the start
+ *                     of the mapped region within the buffer object. May be
+ *                     %NULL if this information is not required.
+ * @mapped_size_out: [OUT] Pointer to location to write size of the mapped
+ *                   region. May be %NULL if this information is not required.
+ *
+ * If successful, a reference will be taken on the buffer object. The caller
+ * must drop the reference with pvr_gem_object_put().
+ *
+ * Return:
+ *  * The PowerVR buffer object mapped at @device_addr if one exists, or
+ *  * %NULL otherwise.
+ */
+struct pvr_gem_object *
+pvr_vm_find_gem_object(struct pvr_vm_context *vm_ctx, u64 device_addr,
+                      u64 *mapped_offset_out, u64 *mapped_size_out)
+{
+       struct pvr_gem_object *pvr_obj;
+       struct drm_gpuva *va;
+
+       mutex_lock(&vm_ctx->lock);
+
+       va = drm_gpuva_find_first(&vm_ctx->gpuvm_mgr, device_addr, 1);
+       if (!va)
+               goto err_unlock;
+
+       pvr_obj = gem_to_pvr_gem(va->gem.obj);
+       pvr_gem_object_get(pvr_obj);
+
+       if (mapped_offset_out)
+               *mapped_offset_out = va->gem.offset;
+       if (mapped_size_out)
+               *mapped_size_out = va->va.range;
+
+       mutex_unlock(&vm_ctx->lock);
+
+       return pvr_obj;
+
+err_unlock:
+       mutex_unlock(&vm_ctx->lock);
+
+       return NULL;
+}
+
+/**
+ * pvr_vm_get_fw_mem_context: Get object representing firmware memory context
+ * @vm_ctx: Target VM context.
+ *
+ * Returns:
+ *  * FW object representing firmware memory context, or
+ *  * %NULL if this VM context does not have a firmware memory context.
+ */
+struct pvr_fw_object *
+pvr_vm_get_fw_mem_context(struct pvr_vm_context *vm_ctx)
+{
+       return vm_ctx->fw_mem_ctx_obj;
+}
diff --git a/drivers/gpu/drm/imagination/pvr_vm.h b/drivers/gpu/drm/imagination/pvr_vm.h
new file mode 100644 (file)
index 0000000..cf8b975
--- /dev/null
@@ -0,0 +1,65 @@
+/* SPDX-License-Identifier: GPL-2.0-only OR MIT */
+/* Copyright (c) 2023 Imagination Technologies Ltd. */
+
+#ifndef PVR_VM_H
+#define PVR_VM_H
+
+#include "pvr_rogue_mmu_defs.h"
+
+#include <uapi/drm/pvr_drm.h>
+
+#include <linux/types.h>
+
+/* Forward declaration from "pvr_device.h" */
+struct pvr_device;
+struct pvr_file;
+
+/* Forward declaration from "pvr_gem.h" */
+struct pvr_gem_object;
+
+/* Forward declaration from "pvr_vm.c" */
+struct pvr_vm_context;
+
+/* Forward declaration from <uapi/drm/pvr_drm.h> */
+struct drm_pvr_ioctl_get_heap_info_args;
+
+/* Forward declaration from <drm/drm_exec.h> */
+struct drm_exec;
+
+/* Functions defined in pvr_vm.c */
+
+bool pvr_device_addr_is_valid(u64 device_addr);
+bool pvr_device_addr_and_size_are_valid(u64 device_addr, u64 size);
+
+struct pvr_vm_context *pvr_vm_create_context(struct pvr_device *pvr_dev,
+                                            bool is_userspace_context);
+
+int pvr_vm_map(struct pvr_vm_context *vm_ctx,
+              struct pvr_gem_object *pvr_obj, u64 pvr_obj_offset,
+              u64 device_addr, u64 size);
+int pvr_vm_unmap(struct pvr_vm_context *vm_ctx, u64 device_addr, u64 size);
+
+dma_addr_t pvr_vm_get_page_table_root_addr(struct pvr_vm_context *vm_ctx);
+struct dma_resv *pvr_vm_get_dma_resv(struct pvr_vm_context *vm_ctx);
+
+int pvr_static_data_areas_get(const struct pvr_device *pvr_dev,
+                             struct drm_pvr_ioctl_dev_query_args *args);
+int pvr_heap_info_get(const struct pvr_device *pvr_dev,
+                     struct drm_pvr_ioctl_dev_query_args *args);
+const struct drm_pvr_heap *pvr_find_heap_containing(struct pvr_device *pvr_dev,
+                                                   u64 addr, u64 size);
+
+struct pvr_gem_object *pvr_vm_find_gem_object(struct pvr_vm_context *vm_ctx,
+                                             u64 device_addr,
+                                             u64 *mapped_offset_out,
+                                             u64 *mapped_size_out);
+
+struct pvr_fw_object *
+pvr_vm_get_fw_mem_context(struct pvr_vm_context *vm_ctx);
+
+struct pvr_vm_context *pvr_vm_context_lookup(struct pvr_file *pvr_file, u32 handle);
+struct pvr_vm_context *pvr_vm_context_get(struct pvr_vm_context *vm_ctx);
+bool pvr_vm_context_put(struct pvr_vm_context *vm_ctx);
+void pvr_destroy_vm_contexts_for_file(struct pvr_file *pvr_file);
+
+#endif /* PVR_VM_H */