xe_pt.o \
xe_pt_walk.o \
xe_pxp.o \
+ xe_pxp_submit.o \
xe_query.o \
xe_range_fence.o \
xe_reg_sr.o \
#ifndef _ABI_GSC_PXP_COMMANDS_ABI_H
#define _ABI_GSC_PXP_COMMANDS_ABI_H
+#include <linux/sizes.h>
#include <linux/types.h>
/* Heci client ID for PXP commands */
#define PXP_APIVER(x, y) (((x) & 0xFFFF) << 16 | ((y) & 0xFFFF))
+/*
+ * A PXP sub-section in an HECI packet can be up to 64K big in each direction.
+ * This does not include the top-level GSC header.
+ */
+#define PXP_MAX_PACKET_SIZE SZ_64K
+
/*
* there are a lot of status codes for PXP, but we only define the cross-API
* common ones that we actually can handle in the kernel driver. Other failure
struct xe_exec_queue *q;
int err;
+ /* VMs for GSCCS queues (and only those) must have the XE_VM_FLAG_GSC flag */
+ xe_assert(xe, !vm || (!!(vm->flags & XE_VM_FLAG_GSC) == !!(hwe->engine_id == XE_HW_ENGINE_GSCCS0)));
+
q = __xe_exec_queue_alloc(xe, vm, logical_mask, width, hwe, flags,
extensions);
if (IS_ERR(q))
#include "xe_gt.h"
#include "xe_gt_types.h"
#include "xe_mmio.h"
+#include "xe_pxp_submit.h"
#include "xe_pxp_types.h"
#include "xe_uc_fw.h"
#include "regs/xe_pxp_regs.h"
return kcr_pxp_set_status(pxp, true);
}
+static int kcr_pxp_disable(const struct xe_pxp *pxp)
+{
+ return kcr_pxp_set_status(pxp, false);
+}
+
+static void pxp_fini(void *arg)
+{
+ struct xe_pxp *pxp = arg;
+
+ xe_pxp_destroy_execution_resources(pxp);
+
+ /* no need to explicitly disable KCR since we're going to do an FLR */
+}
+
/**
* xe_pxp_init - initialize PXP support
* @xe: the xe_device structure
if (err)
goto out_free;
+ err = xe_pxp_allocate_execution_resources(pxp);
+ if (err)
+ goto out_kcr_disable;
+
xe->pxp = pxp;
- return 0;
+ return devm_add_action_or_reset(xe->drm.dev, pxp_fini, pxp);
+out_kcr_disable:
+ kcr_pxp_disable(pxp);
out_free:
drmm_kfree(&xe->drm, pxp);
return err;
--- /dev/null
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright(c) 2024 Intel Corporation.
+ */
+
+#include "xe_pxp_submit.h"
+
+#include <uapi/drm/xe_drm.h>
+
+#include "xe_device_types.h"
+#include "xe_bo.h"
+#include "xe_exec_queue.h"
+#include "xe_gsc_submit.h"
+#include "xe_gt.h"
+#include "xe_pxp_types.h"
+#include "xe_vm.h"
+
+/*
+ * The VCS is used for kernel-owned GGTT submissions to issue key termination.
+ * Terminations are serialized, so we only need a single queue and a single
+ * batch.
+ */
+static int allocate_vcs_execution_resources(struct xe_pxp *pxp)
+{
+ struct xe_gt *gt = pxp->gt;
+ struct xe_device *xe = pxp->xe;
+ struct xe_tile *tile = gt_to_tile(gt);
+ struct xe_hw_engine *hwe;
+ struct xe_exec_queue *q;
+ struct xe_bo *bo;
+ int err;
+
+ hwe = xe_gt_hw_engine(gt, XE_ENGINE_CLASS_VIDEO_DECODE, 0, true);
+ if (!hwe)
+ return -ENODEV;
+
+ q = xe_exec_queue_create(xe, NULL, BIT(hwe->logical_instance), 1, hwe,
+ EXEC_QUEUE_FLAG_KERNEL | EXEC_QUEUE_FLAG_PERMANENT, 0);
+ if (IS_ERR(q))
+ return PTR_ERR(q);
+
+ /*
+ * Each termination is 16 DWORDS, so 4K is enough to contain a
+ * termination for each sessions.
+ */
+ bo = xe_bo_create_pin_map(xe, tile, 0, SZ_4K, ttm_bo_type_kernel,
+ XE_BO_FLAG_SYSTEM | XE_BO_FLAG_PINNED | XE_BO_FLAG_GGTT);
+ if (IS_ERR(bo)) {
+ err = PTR_ERR(bo);
+ goto out_queue;
+ }
+
+ pxp->vcs_exec.q = q;
+ pxp->vcs_exec.bo = bo;
+
+ return 0;
+
+out_queue:
+ xe_exec_queue_put(q);
+ return err;
+}
+
+static void destroy_vcs_execution_resources(struct xe_pxp *pxp)
+{
+ if (pxp->vcs_exec.bo)
+ xe_bo_unpin_map_no_vm(pxp->vcs_exec.bo);
+
+ if (pxp->vcs_exec.q)
+ xe_exec_queue_put(pxp->vcs_exec.q);
+}
+
+#define PXP_BB_SIZE XE_PAGE_SIZE
+static int allocate_gsc_client_resources(struct xe_gt *gt,
+ struct xe_pxp_gsc_client_resources *gsc_res,
+ size_t inout_size)
+{
+ struct xe_tile *tile = gt_to_tile(gt);
+ struct xe_device *xe = tile_to_xe(tile);
+ struct xe_hw_engine *hwe;
+ struct xe_vm *vm;
+ struct xe_bo *bo;
+ struct xe_exec_queue *q;
+ struct dma_fence *fence;
+ long timeout;
+ int err = 0;
+
+ hwe = xe_gt_hw_engine(gt, XE_ENGINE_CLASS_OTHER, 0, true);
+
+ /* we shouldn't reach here if the GSC engine is not available */
+ xe_assert(xe, hwe);
+
+ /* PXP instructions must be issued from PPGTT */
+ vm = xe_vm_create(xe, XE_VM_FLAG_GSC);
+ if (IS_ERR(vm))
+ return PTR_ERR(vm);
+
+ /* We allocate a single object for the batch and the in/out memory */
+ xe_vm_lock(vm, false);
+ bo = xe_bo_create_pin_map(xe, tile, vm, PXP_BB_SIZE + inout_size * 2,
+ ttm_bo_type_kernel,
+ XE_BO_FLAG_SYSTEM | XE_BO_FLAG_PINNED | XE_BO_FLAG_NEEDS_UC);
+ xe_vm_unlock(vm);
+ if (IS_ERR(bo)) {
+ err = PTR_ERR(bo);
+ goto vm_out;
+ }
+
+ fence = xe_vm_bind_kernel_bo(vm, bo, NULL, 0, XE_CACHE_WB);
+ if (IS_ERR(fence)) {
+ err = PTR_ERR(fence);
+ goto bo_out;
+ }
+
+ timeout = dma_fence_wait_timeout(fence, false, HZ);
+ dma_fence_put(fence);
+ if (timeout <= 0) {
+ err = timeout ?: -ETIME;
+ goto bo_out;
+ }
+
+ q = xe_exec_queue_create(xe, vm, BIT(hwe->logical_instance), 1, hwe,
+ EXEC_QUEUE_FLAG_KERNEL |
+ EXEC_QUEUE_FLAG_PERMANENT, 0);
+ if (IS_ERR(q)) {
+ err = PTR_ERR(q);
+ goto bo_out;
+ }
+
+ gsc_res->vm = vm;
+ gsc_res->bo = bo;
+ gsc_res->inout_size = inout_size;
+ gsc_res->batch = IOSYS_MAP_INIT_OFFSET(&bo->vmap, 0);
+ gsc_res->msg_in = IOSYS_MAP_INIT_OFFSET(&bo->vmap, PXP_BB_SIZE);
+ gsc_res->msg_out = IOSYS_MAP_INIT_OFFSET(&bo->vmap, PXP_BB_SIZE + inout_size);
+ gsc_res->q = q;
+
+ /* initialize host-session-handle (for all Xe-to-gsc-firmware PXP cmds) */
+ gsc_res->host_session_handle = xe_gsc_create_host_session_id();
+
+ return 0;
+
+bo_out:
+ xe_bo_unpin_map_no_vm(bo);
+vm_out:
+ xe_vm_close_and_put(vm);
+
+ return err;
+}
+
+static void destroy_gsc_client_resources(struct xe_pxp_gsc_client_resources *gsc_res)
+{
+ if (!gsc_res->q)
+ return;
+
+ xe_exec_queue_put(gsc_res->q);
+ xe_bo_unpin_map_no_vm(gsc_res->bo);
+ xe_vm_close_and_put(gsc_res->vm);
+}
+
+/**
+ * xe_pxp_allocate_execution_resources - Allocate PXP submission objects
+ * @pxp: the xe_pxp structure
+ *
+ * Allocates exec_queues objects for VCS and GSCCS submission. The GSCCS
+ * submissions are done via PPGTT, so this function allocates a VM for it and
+ * maps the object into it.
+ *
+ * Returns 0 if the allocation and mapping is successful, an errno value
+ * otherwise.
+ */
+int xe_pxp_allocate_execution_resources(struct xe_pxp *pxp)
+{
+ int err;
+
+ err = allocate_vcs_execution_resources(pxp);
+ if (err)
+ return err;
+
+ /*
+ * PXP commands can require a lot of BO space (see PXP_MAX_PACKET_SIZE),
+ * but we currently only support a subset of commands that are small
+ * (< 20 dwords), so a single page is enough for now.
+ */
+ err = allocate_gsc_client_resources(pxp->gt, &pxp->gsc_res, XE_PAGE_SIZE);
+ if (err)
+ goto destroy_vcs_context;
+
+ return 0;
+
+destroy_vcs_context:
+ destroy_vcs_execution_resources(pxp);
+ return err;
+}
+
+void xe_pxp_destroy_execution_resources(struct xe_pxp *pxp)
+{
+ destroy_gsc_client_resources(&pxp->gsc_res);
+ destroy_vcs_execution_resources(pxp);
+}
--- /dev/null
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright(c) 2024, Intel Corporation. All rights reserved.
+ */
+
+#ifndef __XE_PXP_SUBMIT_H__
+#define __XE_PXP_SUBMIT_H__
+
+struct xe_pxp;
+
+int xe_pxp_allocate_execution_resources(struct xe_pxp *pxp);
+void xe_pxp_destroy_execution_resources(struct xe_pxp *pxp);
+
+#endif /* __XE_PXP_SUBMIT_H__ */
#ifndef __XE_PXP_TYPES_H__
#define __XE_PXP_TYPES_H__
+#include <linux/iosys-map.h>
+#include <linux/types.h>
+
+struct xe_bo;
+struct xe_exec_queue;
struct xe_device;
struct xe_gt;
+struct xe_vm;
+
+/**
+ * struct xe_pxp_gsc_client_resources - resources for GSC submission by a PXP
+ * client. The GSC FW supports multiple GSC client active at the same time.
+ */
+struct xe_pxp_gsc_client_resources {
+ /**
+ * @host_session_handle: handle used to identify the client in messages
+ * sent to the GSC firmware.
+ */
+ u64 host_session_handle;
+ /** @vm: VM used for PXP submissions to the GSCCS */
+ struct xe_vm *vm;
+ /** @q: GSCCS exec queue for PXP submissions */
+ struct xe_exec_queue *q;
+
+ /**
+ * @bo: BO used for submissions to the GSCCS and GSC FW. It includes
+ * space for the GSCCS batch and the input/output buffers read/written
+ * by the FW
+ */
+ struct xe_bo *bo;
+ /** @inout_size: size of each of the msg_in/out sections individually */
+ u32 inout_size;
+ /** @batch: iosys_map to the batch memory within the BO */
+ struct iosys_map batch;
+ /** @msg_in: iosys_map to the input memory within the BO */
+ struct iosys_map msg_in;
+ /** @msg_out: iosys_map to the output memory within the BO */
+ struct iosys_map msg_out;
+};
/**
* struct xe_pxp - pxp state
* (VDBOX, KCR and GSC)
*/
struct xe_gt *gt;
+
+ /** @vcs_exec: kernel-owned objects for PXP submissions to the VCS */
+ struct {
+ /** @vcs_exec.q: kernel-owned VCS exec queue used for PXP terminations */
+ struct xe_exec_queue *q;
+ /** @vcs_exec.bo: BO used for submissions to the VCS */
+ struct xe_bo *bo;
+ } vcs_exec;
+
+ /** @gsc_res: kernel-owned objects for PXP submissions to the GSCCS */
+ struct xe_pxp_gsc_client_resources gsc_res;
};
#endif /* __XE_PXP_TYPES_H__ */
struct xe_tile *tile;
u8 id;
+ /*
+ * Since the GSCCS is not user-accessible, we don't expect a GSC VM to
+ * ever be in faulting mode.
+ */
+ xe_assert(xe, !((flags & XE_VM_FLAG_GSC) && (flags & XE_VM_FLAG_FAULT_MODE)));
+
vm = kzalloc(sizeof(*vm), GFP_KERNEL);
if (!vm)
return ERR_PTR(-ENOMEM);
vm->flags = flags;
- init_rwsem(&vm->lock);
+ /**
+ * GSC VMs are kernel-owned, only used for PXP ops and can sometimes be
+ * manipulated under the PXP mutex. However, the PXP mutex can be taken
+ * under a user-VM lock when the PXP session is started at exec_queue
+ * creation time. Those are different VMs and therefore there is no risk
+ * of deadlock, but we need to tell lockdep that this is the case or it
+ * will print a warning.
+ */
+ if (flags & XE_VM_FLAG_GSC) {
+ static struct lock_class_key gsc_vm_key;
+
+ __init_rwsem(&vm->lock, "gsc_vm", &gsc_vm_key);
+ } else {
+ init_rwsem(&vm->lock);
+ }
mutex_init(&vm->snap_mutex);
INIT_LIST_HEAD(&vm->rebind_list);
for (i = 0; i < vops->num_syncs; i++)
xe_sync_entry_signal(vops->syncs + i, fence);
xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence);
- dma_fence_put(fence);
}
-static int vm_bind_ioctl_ops_execute(struct xe_vm *vm,
- struct xe_vma_ops *vops)
+static struct dma_fence *vm_bind_ioctl_ops_execute(struct xe_vm *vm,
+ struct xe_vma_ops *vops)
{
struct drm_exec exec;
struct dma_fence *fence;
drm_exec_until_all_locked(&exec) {
err = vm_bind_ioctl_ops_lock_and_prep(&exec, vm, vops);
drm_exec_retry_on_contention(&exec);
- if (err)
+ if (err) {
+ fence = ERR_PTR(err);
goto unlock;
+ }
fence = ops_execute(vm, vops);
- if (IS_ERR(fence)) {
- err = PTR_ERR(fence);
+ if (IS_ERR(fence))
goto unlock;
- }
vm_bind_ioctl_ops_fini(vm, vops, fence);
}
unlock:
drm_exec_fini(&exec);
- return err;
+ return fence;
}
ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_execute, ERRNO);
struct xe_sync_entry *syncs = NULL;
struct drm_xe_vm_bind_op *bind_ops;
struct xe_vma_ops vops;
+ struct dma_fence *fence;
int err;
int i;
if (err)
goto unwind_ops;
- err = vm_bind_ioctl_ops_execute(vm, &vops);
+ fence = vm_bind_ioctl_ops_execute(vm, &vops);
+ if (IS_ERR(fence))
+ err = PTR_ERR(fence);
+ else
+ dma_fence_put(fence);
unwind_ops:
if (err && err != -ENODATA)
return err;
}
+/**
+ * xe_vm_bind_kernel_bo - bind a kernel BO to a VM
+ * @vm: VM to bind the BO to
+ * @bo: BO to bind
+ * @q: exec queue to use for the bind (optional)
+ * @addr: address at which to bind the BO
+ * @cache_lvl: PAT cache level to use
+ *
+ * Execute a VM bind map operation on a kernel-owned BO to bind it into a
+ * kernel-owned VM.
+ *
+ * Returns a dma_fence to track the binding completion if the job to do so was
+ * successfully submitted, an error pointer otherwise.
+ */
+struct dma_fence *xe_vm_bind_kernel_bo(struct xe_vm *vm, struct xe_bo *bo,
+ struct xe_exec_queue *q, u64 addr,
+ enum xe_cache_level cache_lvl)
+{
+ struct xe_vma_ops vops;
+ struct drm_gpuva_ops *ops = NULL;
+ struct dma_fence *fence;
+ int err;
+
+ xe_bo_get(bo);
+ xe_vm_get(vm);
+ if (q)
+ xe_exec_queue_get(q);
+
+ down_write(&vm->lock);
+
+ xe_vma_ops_init(&vops, vm, q, NULL, 0);
+
+ ops = vm_bind_ioctl_ops_create(vm, bo, 0, addr, bo->size,
+ DRM_XE_VM_BIND_OP_MAP, 0, 0,
+ vm->xe->pat.idx[cache_lvl]);
+ if (IS_ERR(ops)) {
+ err = PTR_ERR(ops);
+ goto release_vm_lock;
+ }
+
+ err = vm_bind_ioctl_ops_parse(vm, ops, &vops);
+ if (err)
+ goto release_vm_lock;
+
+ xe_assert(vm->xe, !list_empty(&vops.list));
+
+ err = xe_vma_ops_alloc(&vops, false);
+ if (err)
+ goto unwind_ops;
+
+ fence = vm_bind_ioctl_ops_execute(vm, &vops);
+ if (IS_ERR(fence))
+ err = PTR_ERR(fence);
+
+unwind_ops:
+ if (err && err != -ENODATA)
+ vm_bind_ioctl_ops_unwind(vm, &ops, 1);
+
+ xe_vma_ops_fini(&vops);
+ drm_gpuva_ops_free(&vm->gpuvm, ops);
+
+release_vm_lock:
+ up_write(&vm->lock);
+
+ if (q)
+ xe_exec_queue_put(q);
+ xe_vm_put(vm);
+ xe_bo_put(bo);
+
+ if (err)
+ fence = ERR_PTR(err);
+
+ return fence;
+}
+
/**
* xe_vm_lock() - Lock the vm's dma_resv object
* @vm: The struct xe_vm whose lock is to be locked
struct ttm_buffer_object;
+struct dma_fence;
+
struct xe_exec_queue;
struct xe_file;
struct xe_sync_entry;
int xe_vm_validate_rebind(struct xe_vm *vm, struct drm_exec *exec,
unsigned int num_fences);
+struct dma_fence *xe_vm_bind_kernel_bo(struct xe_vm *vm, struct xe_bo *bo,
+ struct xe_exec_queue *q, u64 addr,
+ enum xe_cache_level cache_lvl);
+
/**
* xe_vm_resv() - Return's the vm's reservation object
* @vm: The vm
#define XE_VM_FLAG_BANNED BIT(5)
#define XE_VM_FLAG_TILE_ID(flags) FIELD_GET(GENMASK(7, 6), flags)
#define XE_VM_FLAG_SET_TILE_ID(tile) FIELD_PREP(GENMASK(7, 6), (tile)->id)
+#define XE_VM_FLAG_GSC BIT(8)
unsigned long flags;
/** @composite_fence_ctx: context composite fence */