drm/xe/oa/uapi: Make OA buffer size configurable
authorSai Teja Pottumuttu <sai.teja.pottumuttu@intel.com>
Thu, 5 Dec 2024 04:19:13 +0000 (09:49 +0530)
committerAshutosh Dixit <ashutosh.dixit@intel.com>
Tue, 10 Dec 2024 18:26:55 +0000 (10:26 -0800)
Add a new property called DRM_XE_OA_PROPERTY_OA_BUFFER_SIZE to
allow OA buffer size to be configurable from userspace.

With this OA buffer size can be configured to any power of 2
size between 128KB and 128MB and it would default to 16MB in case
the size is not supplied.

v2:
  - Rebase
v3:
  - Add oa buffer size to capabilities [Ashutosh]
  - Address several nitpicks [Ashutosh]
  - Fix commit message/subject [Ashutosh]

BSpec: 61100, 61228
Signed-off-by: Sai Teja Pottumuttu <sai.teja.pottumuttu@intel.com>
Reviewed-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20241205041913.883767-2-sai.teja.pottumuttu@intel.com
drivers/gpu/drm/xe/regs/xe_oa_regs.h
drivers/gpu/drm/xe/xe_oa.c
drivers/gpu/drm/xe/xe_oa_types.h
drivers/gpu/drm/xe/xe_query.c
include/uapi/drm/xe_drm.h

index a9b0091cb7ee118074b64cb60b0c9a4ad46acf2b..a49561e9f3c312d8d9799d338e6aab649faf9c9b 100644 (file)
 
 #define OAG_OABUFFER           XE_REG(0xdb08)
 #define  OABUFFER_SIZE_MASK    REG_GENMASK(5, 3)
-#define  OABUFFER_SIZE_128K    REG_FIELD_PREP(OABUFFER_SIZE_MASK, 0)
-#define  OABUFFER_SIZE_256K    REG_FIELD_PREP(OABUFFER_SIZE_MASK, 1)
-#define  OABUFFER_SIZE_512K    REG_FIELD_PREP(OABUFFER_SIZE_MASK, 2)
-#define  OABUFFER_SIZE_1M      REG_FIELD_PREP(OABUFFER_SIZE_MASK, 3)
-#define  OABUFFER_SIZE_2M      REG_FIELD_PREP(OABUFFER_SIZE_MASK, 4)
-#define  OABUFFER_SIZE_4M      REG_FIELD_PREP(OABUFFER_SIZE_MASK, 5)
-#define  OABUFFER_SIZE_8M      REG_FIELD_PREP(OABUFFER_SIZE_MASK, 6)
-#define  OABUFFER_SIZE_16M     REG_FIELD_PREP(OABUFFER_SIZE_MASK, 7)
 #define  OAG_OABUFFER_MEMORY_SELECT            REG_BIT(0) /* 0: PPGTT, 1: GGTT */
 
 #define OAG_OACONTROL                          XE_REG(0xdaf4)
@@ -63,6 +55,7 @@
 #define OAG_OA_DEBUG XE_REG(0xdaf8, XE_REG_OPTION_MASKED)
 #define  OAG_OA_DEBUG_DISABLE_MMIO_TRG                 REG_BIT(14)
 #define  OAG_OA_DEBUG_START_TRIGGER_SCOPE_CONTROL      REG_BIT(13)
+#define  OAG_OA_DEBUG_BUF_SIZE_SELECT                  REG_BIT(12)
 #define  OAG_OA_DEBUG_DISABLE_START_TRG_2_COUNT_QUAL   REG_BIT(8)
 #define  OAG_OA_DEBUG_DISABLE_START_TRG_1_COUNT_QUAL   REG_BIT(7)
 #define  OAG_OA_DEBUG_INCLUDE_CLK_RATIO                        REG_BIT(6)
index 8dd55798ab312062cceded653516cb82eca2ace4..ec88b18e9baa235a06af0898e51b7dd8939ab5b3 100644 (file)
@@ -96,6 +96,7 @@ struct xe_oa_open_param {
        struct drm_xe_sync __user *syncs_user;
        int num_syncs;
        struct xe_sync_entry *syncs;
+       size_t oa_buffer_size;
 };
 
 struct xe_oa_config_bo {
@@ -403,11 +404,19 @@ static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf,
 
 static void xe_oa_init_oa_buffer(struct xe_oa_stream *stream)
 {
-       struct xe_mmio *mmio = &stream->gt->mmio;
        u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo);
-       u32 oa_buf = gtt_offset | OABUFFER_SIZE_16M | OAG_OABUFFER_MEMORY_SELECT;
+       int size_exponent = __ffs(stream->oa_buffer.bo->size);
+       u32 oa_buf = gtt_offset | OAG_OABUFFER_MEMORY_SELECT;
+       struct xe_mmio *mmio = &stream->gt->mmio;
        unsigned long flags;
 
+       /*
+        * If oa buffer size is more than 16MB (exponent greater than 24), the
+        * oa buffer size field is multiplied by 8 in xe_oa_enable_metric_set.
+        */
+       oa_buf |= REG_FIELD_PREP(OABUFFER_SIZE_MASK,
+               size_exponent > 24 ? size_exponent - 20 : size_exponent - 17);
+
        spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);
 
        xe_mmio_write32(mmio, __oa_regs(stream)->oa_status, 0);
@@ -901,15 +910,12 @@ static void xe_oa_stream_destroy(struct xe_oa_stream *stream)
        xe_file_put(stream->xef);
 }
 
-static int xe_oa_alloc_oa_buffer(struct xe_oa_stream *stream)
+static int xe_oa_alloc_oa_buffer(struct xe_oa_stream *stream, size_t size)
 {
        struct xe_bo *bo;
 
-       BUILD_BUG_ON_NOT_POWER_OF_2(XE_OA_BUFFER_SIZE);
-       BUILD_BUG_ON(XE_OA_BUFFER_SIZE < SZ_128K || XE_OA_BUFFER_SIZE > SZ_16M);
-
        bo = xe_bo_create_pin_map(stream->oa->xe, stream->gt->tile, NULL,
-                                 XE_OA_BUFFER_SIZE, ttm_bo_type_kernel,
+                                 size, ttm_bo_type_kernel,
                                  XE_BO_FLAG_SYSTEM | XE_BO_FLAG_GGTT);
        if (IS_ERR(bo))
                return PTR_ERR(bo);
@@ -1087,6 +1093,13 @@ static u32 oag_report_ctx_switches(const struct xe_oa_stream *stream)
                             0 : OAG_OA_DEBUG_DISABLE_CTX_SWITCH_REPORTS);
 }
 
+static u32 oag_buf_size_select(const struct xe_oa_stream *stream)
+{
+       return _MASKED_FIELD(OAG_OA_DEBUG_BUF_SIZE_SELECT,
+                            stream->oa_buffer.bo->size > SZ_16M ?
+                            OAG_OA_DEBUG_BUF_SIZE_SELECT : 0);
+}
+
 static int xe_oa_enable_metric_set(struct xe_oa_stream *stream)
 {
        struct xe_mmio *mmio = &stream->gt->mmio;
@@ -1119,6 +1132,7 @@ static int xe_oa_enable_metric_set(struct xe_oa_stream *stream)
        xe_mmio_write32(mmio, __oa_regs(stream)->oa_debug,
                        _MASKED_BIT_ENABLE(oa_debug) |
                        oag_report_ctx_switches(stream) |
+                       oag_buf_size_select(stream) |
                        oag_configure_mmio_trigger(stream, true));
 
        xe_mmio_write32(mmio, __oa_regs(stream)->oa_ctx_ctrl, stream->periodic ?
@@ -1260,6 +1274,17 @@ static int xe_oa_set_prop_syncs_user(struct xe_oa *oa, u64 value,
        return 0;
 }
 
+static int xe_oa_set_prop_oa_buffer_size(struct xe_oa *oa, u64 value,
+                                        struct xe_oa_open_param *param)
+{
+       if (!is_power_of_2(value) || value < SZ_128K || value > SZ_128M) {
+               drm_dbg(&oa->xe->drm, "OA buffer size invalid %llu\n", value);
+               return -EINVAL;
+       }
+       param->oa_buffer_size = value;
+       return 0;
+}
+
 static int xe_oa_set_prop_ret_inval(struct xe_oa *oa, u64 value,
                                    struct xe_oa_open_param *param)
 {
@@ -1280,6 +1305,7 @@ static const xe_oa_set_property_fn xe_oa_set_property_funcs_open[] = {
        [DRM_XE_OA_PROPERTY_NO_PREEMPT] = xe_oa_set_no_preempt,
        [DRM_XE_OA_PROPERTY_NUM_SYNCS] = xe_oa_set_prop_num_syncs,
        [DRM_XE_OA_PROPERTY_SYNCS] = xe_oa_set_prop_syncs_user,
+       [DRM_XE_OA_PROPERTY_OA_BUFFER_SIZE] = xe_oa_set_prop_oa_buffer_size,
 };
 
 static const xe_oa_set_property_fn xe_oa_set_property_funcs_config[] = {
@@ -1294,6 +1320,7 @@ static const xe_oa_set_property_fn xe_oa_set_property_funcs_config[] = {
        [DRM_XE_OA_PROPERTY_NO_PREEMPT] = xe_oa_set_prop_ret_inval,
        [DRM_XE_OA_PROPERTY_NUM_SYNCS] = xe_oa_set_prop_num_syncs,
        [DRM_XE_OA_PROPERTY_SYNCS] = xe_oa_set_prop_syncs_user,
+       [DRM_XE_OA_PROPERTY_OA_BUFFER_SIZE] = xe_oa_set_prop_ret_inval,
 };
 
 static int xe_oa_user_ext_set_property(struct xe_oa *oa, enum xe_oa_user_extn_from from,
@@ -1553,7 +1580,7 @@ static long xe_oa_status_locked(struct xe_oa_stream *stream, unsigned long arg)
 
 static long xe_oa_info_locked(struct xe_oa_stream *stream, unsigned long arg)
 {
-       struct drm_xe_oa_stream_info info = { .oa_buf_size = XE_OA_BUFFER_SIZE, };
+       struct drm_xe_oa_stream_info info = { .oa_buf_size = stream->oa_buffer.bo->size, };
        void __user *uaddr = (void __user *)arg;
 
        if (copy_to_user(uaddr, &info, sizeof(info)))
@@ -1639,7 +1666,7 @@ static int xe_oa_mmap(struct file *file, struct vm_area_struct *vma)
        }
 
        /* Can mmap the entire OA buffer or nothing (no partial OA buffer mmaps) */
-       if (vma->vm_end - vma->vm_start != XE_OA_BUFFER_SIZE) {
+       if (vma->vm_end - vma->vm_start != stream->oa_buffer.bo->size) {
                drm_dbg(&stream->oa->xe->drm, "Wrong mmap size, must be OA buffer size\n");
                return -EINVAL;
        }
@@ -1783,9 +1810,10 @@ static int xe_oa_stream_init(struct xe_oa_stream *stream,
        if (GRAPHICS_VER(stream->oa->xe) >= 20 &&
            stream->hwe->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAG && stream->sample)
                stream->oa_buffer.circ_size =
-                       XE_OA_BUFFER_SIZE - XE_OA_BUFFER_SIZE % stream->oa_buffer.format->size;
+                       param->oa_buffer_size -
+                       param->oa_buffer_size % stream->oa_buffer.format->size;
        else
-               stream->oa_buffer.circ_size = XE_OA_BUFFER_SIZE;
+               stream->oa_buffer.circ_size = param->oa_buffer_size;
 
        if (stream->exec_q && engine_supports_mi_query(stream->hwe)) {
                /* If we don't find the context offset, just return error */
@@ -1828,7 +1856,7 @@ static int xe_oa_stream_init(struct xe_oa_stream *stream,
                goto err_fw_put;
        }
 
-       ret = xe_oa_alloc_oa_buffer(stream);
+       ret = xe_oa_alloc_oa_buffer(stream, param->oa_buffer_size);
        if (ret)
                goto err_fw_put;
 
@@ -2125,6 +2153,9 @@ int xe_oa_stream_open_ioctl(struct drm_device *dev, u64 data, struct drm_file *f
                drm_dbg(&oa->xe->drm, "Using periodic sampling freq %lld Hz\n", oa_freq_hz);
        }
 
+       if (!param.oa_buffer_size)
+               param.oa_buffer_size = DEFAULT_XE_OA_BUFFER_SIZE;
+
        ret = xe_oa_parse_syncs(oa, &param);
        if (ret)
                goto err_exec_q;
index fea9d981e414fadbba7d660da65a449c3d33fe61..df77939156288483dcc23b217b0e1708a9130370 100644 (file)
@@ -15,7 +15,7 @@
 #include "regs/xe_reg_defs.h"
 #include "xe_hw_engine_types.h"
 
-#define XE_OA_BUFFER_SIZE SZ_16M
+#define DEFAULT_XE_OA_BUFFER_SIZE SZ_16M
 
 enum xe_oa_report_header {
        HDR_32_BIT = 0,
index 3eda616f15026d259b49a84d410cf049faba8284..d2a816f71bf26af50f25c3dc0cb7303a7cb2c19b 100644 (file)
@@ -671,7 +671,8 @@ static int query_oa_units(struct xe_device *xe,
                        du->oa_unit_id = u->oa_unit_id;
                        du->oa_unit_type = u->type;
                        du->oa_timestamp_freq = xe_oa_timestamp_frequency(gt);
-                       du->capabilities = DRM_XE_OA_CAPS_BASE | DRM_XE_OA_CAPS_SYNCS;
+                       du->capabilities = DRM_XE_OA_CAPS_BASE | DRM_XE_OA_CAPS_SYNCS |
+                                          DRM_XE_OA_CAPS_OA_BUFFER_SIZE;
 
                        j = 0;
                        for_each_hw_engine(hwe, gt, hwe_id) {
index 4a8a4a63e99ca8fd84429fcdd292a503cfe9de20..0383b52cbd8699869f76b279019ffce530e1df76 100644 (file)
@@ -1486,6 +1486,7 @@ struct drm_xe_oa_unit {
        __u64 capabilities;
 #define DRM_XE_OA_CAPS_BASE            (1 << 0)
 #define DRM_XE_OA_CAPS_SYNCS           (1 << 1)
+#define DRM_XE_OA_CAPS_OA_BUFFER_SIZE  (1 << 2)
 
        /** @oa_timestamp_freq: OA timestamp freq */
        __u64 oa_timestamp_freq;
@@ -1651,6 +1652,14 @@ enum drm_xe_oa_property_id {
         * to the VM bind case.
         */
        DRM_XE_OA_PROPERTY_SYNCS,
+
+       /**
+        * @DRM_XE_OA_PROPERTY_OA_BUFFER_SIZE: Size of OA buffer to be
+        * allocated by the driver in bytes. Supported sizes are powers of
+        * 2 from 128 KiB to 128 MiB. When not specified, a 16 MiB OA
+        * buffer is allocated by default.
+        */
+       DRM_XE_OA_PROPERTY_OA_BUFFER_SIZE,
 };
 
 /**