#include <linux/poll.h>
#include <linux/types.h>
+#include <drm/drm_drv.h>
#include <uapi/drm/xe_drm.h>
+#include "xe_bo.h"
#include "xe_device.h"
#include "xe_eu_stall.h"
+#include "xe_force_wake.h"
+#include "xe_gt_mcr.h"
#include "xe_gt_printk.h"
#include "xe_gt_topology.h"
#include "xe_macros.h"
#include "xe_observation.h"
+#include "xe_pm.h"
+
+#include "regs/xe_eu_stall_regs.h"
+#include "regs/xe_gt_regs.h"
+
+static size_t per_xecore_buf_size = SZ_512K;
+
+struct per_xecore_buf {
+ /* Buffer vaddr */
+ u8 *vaddr;
+ /* Write pointer */
+ u32 write;
+ /* Read pointer */
+ u32 read;
+};
+
+struct xe_eu_stall_data_stream {
+ bool enabled;
+ int wait_num_reports;
+ int sampling_rate_mult;
+ size_t data_record_size;
+ size_t per_xecore_buf_size;
+
+ struct xe_gt *gt;
+ struct xe_bo *bo;
+ struct per_xecore_buf *xecore_buf;
+};
+
+struct xe_eu_stall_gt {
+ /* Lock to protect stream */
+ struct mutex stream_lock;
+ /* EU stall data stream */
+ struct xe_eu_stall_data_stream *stream;
+};
/**
* struct eu_stall_open_properties - EU stall sampling properties received
struct xe_gt *gt;
};
+/*
+ * EU stall data format for PVC
+ */
+struct xe_eu_stall_data_pvc {
+ __u64 ip_addr:29; /* Bits 0 to 28 */
+ __u64 active_count:8; /* Bits 29 to 36 */
+ __u64 other_count:8; /* Bits 37 to 44 */
+ __u64 control_count:8; /* Bits 45 to 52 */
+ __u64 pipestall_count:8; /* Bits 53 to 60 */
+ __u64 send_count:8; /* Bits 61 to 68 */
+ __u64 dist_acc_count:8; /* Bits 69 to 76 */
+ __u64 sbid_count:8; /* Bits 77 to 84 */
+ __u64 sync_count:8; /* Bits 85 to 92 */
+ __u64 inst_fetch_count:8; /* Bits 93 to 100 */
+ __u64 unused_bits:27;
+ __u64 unused[6];
+} __packed;
+
+static size_t xe_eu_stall_data_record_size(struct xe_device *xe)
+{
+ size_t record_size = 0;
+
+ if (xe->info.platform == XE_PVC)
+ record_size = sizeof(struct xe_eu_stall_data_pvc);
+
+ xe_assert(xe, is_power_of_2(record_size));
+
+ return record_size;
+}
+
+/**
+ * num_data_rows - Return the number of EU stall data rows of 64B each
+ * for a given data size.
+ *
+ * @data_size: EU stall data size
+ */
+static u32 num_data_rows(u32 data_size)
+{
+ return data_size >> 6;
+}
+
+static void xe_eu_stall_fini(void *arg)
+{
+ struct xe_gt *gt = arg;
+
+ mutex_destroy(>->eu_stall->stream_lock);
+ kfree(gt->eu_stall);
+}
+
+/**
+ * xe_eu_stall_init() - Allocate and initialize GT level EU stall data
+ * structure xe_eu_stall_gt within struct xe_gt.
+ *
+ * @gt: GT being initialized.
+ *
+ * Returns: zero on success or a negative error code.
+ */
+int xe_eu_stall_init(struct xe_gt *gt)
+{
+ struct xe_device *xe = gt_to_xe(gt);
+ int ret;
+
+ gt->eu_stall = kzalloc(sizeof(*gt->eu_stall), GFP_KERNEL);
+ if (!gt->eu_stall) {
+ ret = -ENOMEM;
+ goto exit;
+ }
+
+ mutex_init(>->eu_stall->stream_lock);
+
+ ret = devm_add_action_or_reset(xe->drm.dev, xe_eu_stall_fini, gt);
+ if (ret)
+ goto exit_free;
+
+ return 0;
+exit_free:
+ mutex_destroy(>->eu_stall->stream_lock);
+ kfree(gt->eu_stall);
+exit:
+ return ret;
+}
+
static int set_prop_eu_stall_sampling_rate(struct xe_device *xe, u64 value,
struct eu_stall_open_properties *props)
{
return ret;
}
+static void xe_eu_stall_stream_free(struct xe_eu_stall_data_stream *stream)
+{
+ struct xe_gt *gt = stream->gt;
+
+ gt->eu_stall->stream = NULL;
+ kfree(stream);
+}
+
+static void xe_eu_stall_data_buf_destroy(struct xe_eu_stall_data_stream *stream)
+{
+ xe_bo_unpin_map_no_vm(stream->bo);
+ kfree(stream->xecore_buf);
+}
+
+static int xe_eu_stall_data_buf_alloc(struct xe_eu_stall_data_stream *stream,
+ u16 last_xecore)
+{
+ struct xe_tile *tile = stream->gt->tile;
+ struct xe_bo *bo;
+ u32 size;
+
+ stream->xecore_buf = kcalloc(last_xecore, sizeof(*stream->xecore_buf), GFP_KERNEL);
+ if (!stream->xecore_buf)
+ return -ENOMEM;
+
+ size = stream->per_xecore_buf_size * last_xecore;
+
+ bo = xe_bo_create_pin_map_at_aligned(tile->xe, tile, NULL,
+ size, ~0ull, ttm_bo_type_kernel,
+ XE_BO_FLAG_SYSTEM | XE_BO_FLAG_GGTT, SZ_64);
+ if (IS_ERR(bo)) {
+ kfree(stream->xecore_buf);
+ return PTR_ERR(bo);
+ }
+
+ XE_WARN_ON(!IS_ALIGNED(xe_bo_ggtt_addr(bo), SZ_64));
+ stream->bo = bo;
+
+ return 0;
+}
+
+static int xe_eu_stall_stream_enable(struct xe_eu_stall_data_stream *stream)
+{
+ u32 write_ptr_reg, write_ptr, read_ptr_reg, reg_value;
+ struct per_xecore_buf *xecore_buf;
+ struct xe_gt *gt = stream->gt;
+ u16 group, instance;
+ unsigned int fw_ref;
+ int xecore;
+
+ /* Take runtime pm ref and forcewake to disable RC6 */
+ xe_pm_runtime_get(gt_to_xe(gt));
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_RENDER);
+ if (!xe_force_wake_ref_has_domain(fw_ref, XE_FW_RENDER)) {
+ xe_gt_err(gt, "Failed to get RENDER forcewake\n");
+ xe_pm_runtime_put(gt_to_xe(gt));
+ return -ETIMEDOUT;
+ }
+
+ for_each_dss_steering(xecore, gt, group, instance) {
+ write_ptr_reg = xe_gt_mcr_unicast_read(gt, XEHPC_EUSTALL_REPORT, group, instance);
+ write_ptr = REG_FIELD_GET(XEHPC_EUSTALL_REPORT_WRITE_PTR_MASK, write_ptr_reg);
+ read_ptr_reg = REG_FIELD_PREP(XEHPC_EUSTALL_REPORT1_READ_PTR_MASK, write_ptr);
+ read_ptr_reg = _MASKED_FIELD(XEHPC_EUSTALL_REPORT1_READ_PTR_MASK, read_ptr_reg);
+ /* Initialize the read pointer to the write pointer */
+ xe_gt_mcr_unicast_write(gt, XEHPC_EUSTALL_REPORT1, read_ptr_reg, group, instance);
+ write_ptr <<= 6;
+ write_ptr &= (stream->per_xecore_buf_size << 1) - 1;
+ xecore_buf = &stream->xecore_buf[xecore];
+ xecore_buf->write = write_ptr;
+ xecore_buf->read = write_ptr;
+ }
+ reg_value = _MASKED_FIELD(EUSTALL_MOCS | EUSTALL_SAMPLE_RATE,
+ REG_FIELD_PREP(EUSTALL_MOCS, gt->mocs.uc_index << 1) |
+ REG_FIELD_PREP(EUSTALL_SAMPLE_RATE,
+ stream->sampling_rate_mult));
+ xe_gt_mcr_multicast_write(gt, XEHPC_EUSTALL_CTRL, reg_value);
+ /* GGTT addresses can never be > 32 bits */
+ xe_gt_mcr_multicast_write(gt, XEHPC_EUSTALL_BASE_UPPER, 0);
+ reg_value = xe_bo_ggtt_addr(stream->bo);
+ reg_value |= REG_FIELD_PREP(XEHPC_EUSTALL_BASE_XECORE_BUF_SZ,
+ stream->per_xecore_buf_size / SZ_256K);
+ reg_value |= XEHPC_EUSTALL_BASE_ENABLE_SAMPLING;
+ xe_gt_mcr_multicast_write(gt, XEHPC_EUSTALL_BASE, reg_value);
+
+ return 0;
+}
+
+static int xe_eu_stall_stream_init(struct xe_eu_stall_data_stream *stream,
+ struct eu_stall_open_properties *props)
+{
+ unsigned int max_wait_num_reports, xecore, last_xecore, num_xecores;
+ struct per_xecore_buf *xecore_buf;
+ struct xe_gt *gt = stream->gt;
+ xe_dss_mask_t all_xecores;
+ u16 group, instance;
+ u32 vaddr_offset;
+ int ret;
+
+ bitmap_or(all_xecores, gt->fuse_topo.g_dss_mask, gt->fuse_topo.c_dss_mask,
+ XE_MAX_DSS_FUSE_BITS);
+ num_xecores = bitmap_weight(all_xecores, XE_MAX_DSS_FUSE_BITS);
+ last_xecore = xe_gt_topology_mask_last_dss(all_xecores) + 1;
+
+ max_wait_num_reports = num_data_rows(per_xecore_buf_size * num_xecores);
+ if (props->wait_num_reports == 0 || props->wait_num_reports > max_wait_num_reports) {
+ xe_gt_dbg(gt, "Invalid EU stall event report count %u\n",
+ props->wait_num_reports);
+ xe_gt_dbg(gt, "Minimum event report count is 1, maximum is %u\n",
+ max_wait_num_reports);
+ return -EINVAL;
+ }
+ stream->per_xecore_buf_size = per_xecore_buf_size;
+ stream->sampling_rate_mult = props->sampling_rate_mult;
+ stream->wait_num_reports = props->wait_num_reports;
+ stream->data_record_size = xe_eu_stall_data_record_size(gt_to_xe(gt));
+
+ ret = xe_eu_stall_data_buf_alloc(stream, last_xecore);
+ if (ret)
+ return ret;
+
+ for_each_dss_steering(xecore, gt, group, instance) {
+ xecore_buf = &stream->xecore_buf[xecore];
+ vaddr_offset = xecore * stream->per_xecore_buf_size;
+ xecore_buf->vaddr = stream->bo->vmap.vaddr + vaddr_offset;
+ }
+ return 0;
+}
+
static __poll_t xe_eu_stall_stream_poll(struct file *file, poll_table *wait)
{
__poll_t ret = 0;
return ret;
}
-static long xe_eu_stall_stream_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+static int xe_eu_stall_enable_locked(struct xe_eu_stall_data_stream *stream)
+{
+ int ret = 0;
+
+ if (stream->enabled)
+ return ret;
+
+ stream->enabled = true;
+
+ ret = xe_eu_stall_stream_enable(stream);
+ return ret;
+}
+
+static int xe_eu_stall_disable_locked(struct xe_eu_stall_data_stream *stream)
{
+ struct xe_gt *gt = stream->gt;
+
+ if (!stream->enabled)
+ return 0;
+
+ stream->enabled = false;
+
+ xe_gt_mcr_multicast_write(gt, XEHPC_EUSTALL_BASE, 0);
+
+ xe_force_wake_put(gt_to_fw(gt), XE_FW_RENDER);
+ xe_pm_runtime_put(gt_to_xe(gt));
+
return 0;
}
+static long xe_eu_stall_stream_ioctl_locked(struct xe_eu_stall_data_stream *stream,
+ unsigned int cmd, unsigned long arg)
+{
+ switch (cmd) {
+ case DRM_XE_OBSERVATION_IOCTL_ENABLE:
+ return xe_eu_stall_enable_locked(stream);
+ case DRM_XE_OBSERVATION_IOCTL_DISABLE:
+ return xe_eu_stall_disable_locked(stream);
+ }
+
+ return -EINVAL;
+}
+
+static long xe_eu_stall_stream_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+ struct xe_eu_stall_data_stream *stream = file->private_data;
+ struct xe_gt *gt = stream->gt;
+ long ret;
+
+ mutex_lock(>->eu_stall->stream_lock);
+ ret = xe_eu_stall_stream_ioctl_locked(stream, cmd, arg);
+ mutex_unlock(>->eu_stall->stream_lock);
+
+ return ret;
+}
+
static int xe_eu_stall_stream_close(struct inode *inode, struct file *file)
{
+ struct xe_eu_stall_data_stream *stream = file->private_data;
+ struct xe_gt *gt = stream->gt;
+
+ drm_dev_put(>->tile->xe->drm);
+
+ mutex_lock(>->eu_stall->stream_lock);
+ xe_eu_stall_disable_locked(stream);
+ xe_eu_stall_data_buf_destroy(stream);
+ xe_eu_stall_stream_free(stream);
+ mutex_unlock(>->eu_stall->stream_lock);
+
return 0;
}
static inline bool has_eu_stall_sampling_support(struct xe_device *xe)
{
- return false;
+ return xe->info.platform == XE_PVC;
+}
+
+static int xe_eu_stall_stream_open_locked(struct drm_device *dev,
+ struct eu_stall_open_properties *props,
+ struct drm_file *file)
+{
+ struct xe_eu_stall_data_stream *stream;
+ struct xe_gt *gt = props->gt;
+ unsigned long f_flags = 0;
+ int ret, stream_fd;
+
+ /* Only one session can be active at any time */
+ if (gt->eu_stall->stream) {
+ xe_gt_dbg(gt, "EU stall sampling session already active\n");
+ return -EBUSY;
+ }
+
+ stream = kzalloc(sizeof(*stream), GFP_KERNEL);
+ if (!stream)
+ return -ENOMEM;
+
+ gt->eu_stall->stream = stream;
+ stream->gt = gt;
+
+ ret = xe_eu_stall_stream_init(stream, props);
+ if (ret) {
+ xe_gt_dbg(gt, "EU stall stream init failed : %d\n", ret);
+ goto err_free;
+ }
+
+ stream_fd = anon_inode_getfd("[xe_eu_stall]", &fops_eu_stall, stream, f_flags);
+ if (stream_fd < 0) {
+ ret = stream_fd;
+ xe_gt_dbg(gt, "EU stall inode get fd failed : %d\n", ret);
+ goto err_destroy;
+ }
+
+ /* Take a reference on the driver that will be kept with stream_fd
+ * until its release.
+ */
+ drm_dev_get(>->tile->xe->drm);
+
+ return stream_fd;
+
+err_destroy:
+ xe_eu_stall_data_buf_destroy(stream);
+err_free:
+ xe_eu_stall_stream_free(stream);
+ return ret;
}
/**
{
struct xe_device *xe = to_xe_device(dev);
struct eu_stall_open_properties props = {};
- int ret, stream_fd;
+ int ret;
if (!has_eu_stall_sampling_support(xe)) {
drm_dbg(&xe->drm, "EU stall monitoring is not supported on this platform\n");
return -EACCES;
}
+ /* Initialize and set default values */
+ props.wait_num_reports = 1;
+ props.sampling_rate_mult = 4;
+
ret = xe_eu_stall_user_extensions(xe, data, 0, &props);
if (ret)
return ret;
return -EINVAL;
}
- stream_fd = anon_inode_getfd("[xe_eu_stall]", &fops_eu_stall, NULL, 0);
- if (stream_fd < 0)
- xe_gt_dbg(props.gt, "EU stall inode get fd failed : %d\n", stream_fd);
+ mutex_lock(&props.gt->eu_stall->stream_lock);
+ ret = xe_eu_stall_stream_open_locked(dev, &props, file);
+ mutex_unlock(&props.gt->eu_stall->stream_lock);
- return stream_fd;
+ return ret;
}