struct i915_gem_proto_context *pc,
const struct drm_i915_gem_context_param *args)
{
- struct drm_i915_private *i915 = fpriv->dev_priv;
+ struct drm_i915_private *i915 = fpriv->i915;
struct i915_address_space *vm;
if (args->size)
struct i915_gem_proto_context *pc,
const struct drm_i915_gem_context_param *args)
{
- struct drm_i915_private *i915 = fpriv->dev_priv;
+ struct drm_i915_private *i915 = fpriv->i915;
struct set_proto_ctx_engines set = { .i915 = i915 };
struct i915_context_param_engines __user *user =
u64_to_user_ptr(args->value);
struct i915_gem_proto_context *pc,
struct drm_i915_gem_context_param *args)
{
- struct drm_i915_private *i915 = fpriv->dev_priv;
+ struct drm_i915_private *i915 = fpriv->i915;
struct drm_i915_gem_context_param_sseu user_sseu;
struct intel_sseu *sseu;
int ret;
break;
case I915_CONTEXT_PARAM_PRIORITY:
- ret = validate_priority(fpriv->dev_priv, args);
+ ret = validate_priority(fpriv->i915, args);
if (!ret)
pc->sched.priority = args->value;
break;
if (args->size)
ret = -EINVAL;
else
- ret = proto_context_set_persistence(fpriv->dev_priv, pc,
+ ret = proto_context_set_persistence(fpriv->i915, pc,
args->value);
break;
case I915_CONTEXT_PARAM_PROTECTED_CONTENT:
- ret = proto_context_set_protected(fpriv->dev_priv, pc,
+ ret = proto_context_set_protected(fpriv->i915, pc,
args->value);
break;
unsigned long idx;
xa_for_each(&file_priv->proto_context_xa, idx, pc)
- proto_context_close(file_priv->dev_priv, pc);
+ proto_context_close(file_priv->i915, pc);
xa_destroy(&file_priv->proto_context_xa);
mutex_destroy(&file_priv->proto_context_lock);
lockdep_assert_held(&file_priv->proto_context_lock);
- ctx = i915_gem_create_context(file_priv->dev_priv, pc);
+ ctx = i915_gem_create_context(file_priv->i915, pc);
if (IS_ERR(ctx))
return ctx;
old = xa_erase(&file_priv->proto_context_xa, id);
GEM_BUG_ON(old != pc);
- proto_context_close(file_priv->dev_priv, pc);
+ proto_context_close(file_priv->i915, pc);
return ctx;
}
GEM_WARN_ON(ctx && pc);
if (pc)
- proto_context_close(file_priv->dev_priv, pc);
+ proto_context_close(file_priv->i915, pc);
if (ctx)
context_close(ctx);
* GEM_CONTEXT_CREATE starting with graphics
* version 13.
*/
- WARN_ON(GRAPHICS_VER(file_priv->dev_priv) > 12);
+ WARN_ON(GRAPHICS_VER(file_priv->i915) > 12);
ret = set_proto_ctx_param(file_priv, pc, args);
} else {
ret = -ENOENT;
}
/**
- * Creates a new object using the same path as DRM_I915_GEM_CREATE_EXT
+ * __i915_gem_object_create_user - Creates a new object using the same path as
+ * DRM_I915_GEM_CREATE_EXT
* @i915: i915 private
* @size: size of the buffer, in bytes
* @placements: possible placement regions, in priority order
}
/**
- * Creates a new mm object and returns a handle to it.
+ * i915_gem_create_ioctl - Creates a new mm object and returns a handle to it.
* @dev: drm device pointer
* @data: ioctl data blob
* @file: drm file pointer
};
/**
- * Creates a new mm object and returns a handle to it.
+ * i915_gem_create_ext_ioctl - Creates a new mm object and returns a handle to it.
* @dev: drm device pointer
* @data: ioctl data blob
* @file: drm file pointer
}
/**
- * Moves a single object to the WC read, and possibly write domain.
+ * i915_gem_object_set_to_wc_domain - Moves a single object to the WC read, and
+ * possibly write domain.
* @obj: object to act on
* @write: ask for write access or read only
*
}
/**
- * Moves a single object to the GTT read, and possibly write domain.
+ * i915_gem_object_set_to_gtt_domain - Moves a single object to the GTT read,
+ * and possibly write domain.
* @obj: object to act on
* @write: ask for write access or read only
*
}
/**
- * Changes the cache-level of an object across all VMA.
+ * i915_gem_object_set_cache_level - Changes the cache-level of an object across all VMA.
* @obj: object to act on
* @cache_level: new cache level to set for the object
*
}
/**
- * Moves a single object to the CPU read, and possibly write domain.
+ * i915_gem_object_set_to_cpu_domain - Moves a single object to the CPU read,
+ * and possibly write domain.
* @obj: object to act on
* @write: requesting write or read-only access
*
}
/**
- * Called when user space prepares to use an object with the CPU, either
+ * i915_gem_set_domain_ioctl - Called when user space prepares to use an
+ * object with the CPU, either
* through the mmap ioctl's mapping or a GTT mapping.
* @dev: drm device
* @data: ioctl data blob
return err;
}
-static int num_vcs_engines(struct drm_i915_private *i915)
-{
- return hweight_long(VDBOX_MASK(to_gt(i915)));
-}
-
/*
* Find one BSD ring to dispatch the corresponding BSD command.
* The engine index is returned.
/* Check whether the file_priv has already selected one ring. */
if ((int)file_priv->bsd_engine < 0)
file_priv->bsd_engine =
- get_random_u32_below(num_vcs_engines(dev_priv));
+ get_random_u32_below(dev_priv->engine_uabi_class_count[I915_ENGINE_CLASS_VIDEO]);
return file_priv->bsd_engine;
}
return -1;
}
- if (user_ring_id == I915_EXEC_BSD && num_vcs_engines(i915) > 1) {
+ if (user_ring_id == I915_EXEC_BSD &&
+ i915->engine_uabi_class_count[I915_ENGINE_CLASS_VIDEO] > 1) {
unsigned int bsd_idx = args->flags & I915_EXEC_BSD_MASK;
if (bsd_idx == I915_EXEC_BSD_DEFAULT) {
memcpy(map, data, size);
- i915_gem_object_unpin_map(obj);
+ i915_gem_object_flush_map(obj);
+ __i915_gem_object_release_map(obj);
return obj;
}
return ret < 0 ? ret : 0;
}
-/**
+/*
* i915_gem_object_has_unknown_state - Return true if the object backing pages are
* in an unknown_state. This means that userspace must NEVER be allowed to touch
* the pages, with either the GPU or CPU.
}
}
-/**
+/*
* __i915_gem_ttm_object_init - Initialize a ttm-backed i915 gem object
* @mem: The initial memory region for the object.
* @obj: The gem object.
* @_src_iter: Storage space for the source kmap iterator.
* @dst_iter: Pointer to the destination kmap iterator.
* @src_iter: Pointer to the source kmap iterator.
+ * @num_pages: Number of pages
* @clear: Whether to clear instead of copy.
* @src_rsgt: Refcounted scatter-gather list of source memory.
* @dst_rsgt: Refcounted scatter-gather list of destination memory.
* i915_ttm_move - The TTM move callback used by i915.
* @bo: The buffer object.
* @evict: Whether this is an eviction.
+ * @ctx: Pointer to a struct ttm_operation_ctx indicating how the waits should be
+ * performed if waiting
* @dst_mem: The destination ttm resource.
* @hop: If we need multihop, what temporary memory type to move to.
*
/**
* i915_ttm_backup_region - Back up all objects of a region to smem.
* @mr: The memory region
- * @allow_gpu: Whether to allow the gpu blitter for this backup.
- * @backup_pinned: Backup also pinned objects.
+ * @flags: TTM backup flags
*
* Loops over all objects of a region and either evicts them if they are
* evictable or backs them up using a backup object if they are pinned.
/**
* i915_ttm_restore_region - Restore backed-up objects of a region from smem.
* @mr: The memory region
- * @allow_gpu: Whether to allow the gpu blitter to recover.
+ * @flags: TTM backup flags
*
* Loops over all objects of a region and if they are backed-up, restores
* them from smem.
}
/**
- * Waits for rendering to the object to be completed
+ * i915_gem_object_wait - Waits for rendering to the object to be completed
* @obj: i915 gem object
* @flags: how to wait (under a lock, for all rendering or just for writes etc)
* @timeout: how long to wait
#include "i915_drv.h"
#include "intel_context_types.h"
#include "intel_engine_types.h"
+#include "intel_gt_pm.h"
#include "intel_ring_types.h"
#include "intel_timeline_types.h"
#include "i915_trace.h"
static inline void intel_context_enter(struct intel_context *ce)
{
lockdep_assert_held(&ce->timeline->mutex);
- if (!ce->active_count++)
- ce->ops->enter(ce);
+ if (ce->active_count++)
+ return;
+
+ ce->ops->enter(ce);
+ intel_gt_pm_get(ce->vm->gt);
}
static inline void intel_context_mark_active(struct intel_context *ce)
{
lockdep_assert_held(&ce->timeline->mutex);
GEM_BUG_ON(!ce->active_count);
- if (!--ce->active_count)
- ce->ops->exit(ce);
+ if (--ce->active_count)
+ return;
+
+ intel_gt_pm_put_async(ce->vm->gt);
+ ce->ops->exit(ce);
}
static inline struct intel_context *intel_context_get(struct intel_context *ce)
&kernel, "kernel_context");
}
-/**
- * intel_engines_init_common - initialize cengine state which might require hw access
+/*
+ * engine_init_common - initialize engine state which might require hw access
* @engine: Engine to initialize.
*
* Initializes @engine@ structure members shared between legacy and execlists
ENGINE_TRACE(engine, "parking\n");
+ /*
+ * Open coded one half of intel_context_enter, which we have to omit
+ * here (see the large comment below) and because the other part must
+ * not be called due constructing directly with __i915_request_create
+ * which increments active count via intel_context_mark_active.
+ */
+ GEM_BUG_ON(rq->context->active_count != 1);
+ __intel_gt_pm_get(engine->gt);
+
/*
* We have to serialise all potential retirement paths with our
* submission, as we don't want to underflow either the
struct intel_ring;
struct intel_uncore;
struct intel_breadcrumbs;
+struct intel_engine_cs;
+struct i915_perf_group;
typedef u32 intel_engine_mask_t;
#define ALL_ENGINES ((intel_engine_mask_t)~0ul)
} props, defaults;
I915_SELFTEST_DECLARE(struct fault_attr reset_timeout);
+
+ /*
+ * The perf group maps to one OA unit which controls one OA buffer. All
+ * reports corresponding to this engine will be reported to this OA
+ * buffer. An engine will map to a single OA unit, but a single OA unit
+ * can generate reports for multiple engines.
+ */
+ struct i915_perf_group *oa_group;
};
static inline bool
* inspecting the queue to see if we need to resumbit.
*/
if (*prev != *execlists->active) { /* elide lite-restores */
+ struct intel_context *prev_ce = NULL, *active_ce = NULL;
+
/*
* Note the inherent discrepancy between the HW runtime,
* recorded as part of the context switch, and the CPU
* and correct overselves later when updating from HW.
*/
if (*prev)
- lrc_runtime_stop((*prev)->context);
+ prev_ce = (*prev)->context;
if (*execlists->active)
- lrc_runtime_start((*execlists->active)->context);
+ active_ce = (*execlists->active)->context;
+ if (prev_ce != active_ce) {
+ if (prev_ce)
+ lrc_runtime_stop(prev_ce);
+ if (active_ce)
+ lrc_runtime_start(active_ce);
+ }
new_timeslice(execlists);
}
ggtt->invalidate(ggtt);
}
+static void gen8_ggtt_clear_range(struct i915_address_space *vm,
+ u64 start, u64 length)
+{
+ struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
+ unsigned int first_entry = start / I915_GTT_PAGE_SIZE;
+ unsigned int num_entries = length / I915_GTT_PAGE_SIZE;
+ const gen8_pte_t scratch_pte = vm->scratch[0]->encode;
+ gen8_pte_t __iomem *gtt_base =
+ (gen8_pte_t __iomem *)ggtt->gsm + first_entry;
+ const int max_entries = ggtt_total_entries(ggtt) - first_entry;
+ int i;
+
+ if (WARN(num_entries > max_entries,
+ "First entry = %d; Num entries = %d (max=%d)\n",
+ first_entry, num_entries, max_entries))
+ num_entries = max_entries;
+
+ for (i = 0; i < num_entries; i++)
+ gen8_set_pte(>t_base[i], scratch_pte);
+}
+
static void gen6_ggtt_insert_page(struct i915_address_space *vm,
dma_addr_t addr,
u64 offset,
* paths, and we trust that 0 will remain reserved. However,
* the only likely reason for failure to insert is a driver
* bug, which we expect to cause other failures...
+ *
+ * Since CPU can perform speculative reads on error capture
+ * (write-combining allows it) add scratch page after error
+ * capture to avoid DMAR errors.
*/
- ggtt->error_capture.size = I915_GTT_PAGE_SIZE;
+ ggtt->error_capture.size = 2 * I915_GTT_PAGE_SIZE;
ggtt->error_capture.color = I915_COLOR_UNEVICTABLE;
if (drm_mm_reserve_node(&ggtt->vm.mm, &ggtt->error_capture))
drm_mm_insert_node_in_range(&ggtt->vm.mm,
0, ggtt->mappable_end,
DRM_MM_INSERT_LOW);
}
- if (drm_mm_node_allocated(&ggtt->error_capture))
+ if (drm_mm_node_allocated(&ggtt->error_capture)) {
+ u64 start = ggtt->error_capture.start;
+ u64 size = ggtt->error_capture.size;
+
+ ggtt->vm.scratch_range(&ggtt->vm, start, size);
drm_dbg(&ggtt->vm.i915->drm,
"Reserved GGTT:[%llx, %llx] for use by error capture\n",
- ggtt->error_capture.start,
- ggtt->error_capture.start + ggtt->error_capture.size);
+ start, start + size);
+ }
/*
* The upper portion of the GuC address space has a sizeable hole
ggtt->vm.cleanup = gen6_gmch_remove;
ggtt->vm.insert_page = gen8_ggtt_insert_page;
ggtt->vm.clear_range = nop_clear_range;
+ ggtt->vm.scratch_range = gen8_ggtt_clear_range;
ggtt->vm.insert_entries = gen8_ggtt_insert_entries;
ggtt->vm.clear_range = nop_clear_range;
if (!HAS_FULL_PPGTT(i915))
ggtt->vm.clear_range = gen6_ggtt_clear_range;
+ ggtt->vm.scratch_range = gen6_ggtt_clear_range;
ggtt->vm.insert_page = gen6_ggtt_insert_page;
ggtt->vm.insert_entries = gen6_ggtt_insert_entries;
ggtt->vm.cleanup = gen6_gmch_remove;
flush = i915_ggtt_resume_vm(&ggtt->vm);
+ if (drm_mm_node_allocated(&ggtt->error_capture))
+ ggtt->vm.scratch_range(&ggtt->vm, ggtt->error_capture.start,
+ ggtt->error_capture.size);
+
ggtt->invalidate(ggtt);
if (flush)
ggtt->vm.insert_page = gmch_ggtt_insert_page;
ggtt->vm.insert_entries = gmch_ggtt_insert_entries;
ggtt->vm.clear_range = gmch_ggtt_clear_range;
+ ggtt->vm.scratch_range = gmch_ggtt_clear_range;
ggtt->vm.cleanup = gmch_ggtt_remove;
ggtt->invalidate = gmch_ggtt_invalidate;
void intel_gt_debugfs_register(struct intel_gt *gt)
{
struct dentry *root;
+ char gtname[4];
if (!gt->i915->drm.primary->debugfs_root)
return;
- root = debugfs_create_dir("gt", gt->i915->drm.primary->debugfs_root);
+ snprintf(gtname, sizeof(gtname), "gt%u", gt->info.id);
+ root = debugfs_create_dir(gtname, gt->i915->drm.primary->debugfs_root);
if (IS_ERR(root))
return;
#define ENABLE_SMALLPL REG_BIT(15)
#define SC_DISABLE_POWER_OPTIMIZATION_EBB REG_BIT(9)
#define GEN11_SAMPLER_ENABLE_HEADLESS_MSG REG_BIT(5)
+#define MTL_DISABLE_SAMPLER_SC_OOO REG_BIT(3)
#define GEN9_HALF_SLICE_CHICKEN7 MCR_REG(0xe194)
#define DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA REG_BIT(15)
#define ENABLE_EU_COUNT_FOR_TDL_FLUSH REG_BIT(10)
#define DISABLE_ECC REG_BIT(5)
#define FLOAT_BLEND_OPTIMIZATION_ENABLE REG_BIT(4)
+/*
+ * We have both ENABLE and DISABLE defines below using the same bit because the
+ * meaning depends on the target platform. There are no platform prefix for them
+ * because different steppings of DG2 pick one or the other semantics.
+ */
#define ENABLE_PREFETCH_INTO_IC REG_BIT(3)
+#define DISABLE_PREFETCH_INTO_IC REG_BIT(3)
#define EU_PERF_CNTL0 PERF_REG(0xe458)
#define EU_PERF_CNTL4 PERF_REG(0xe45c)
#define THREAD_EX_ARB_MODE_RR_AFTER_DEP REG_FIELD_PREP(THREAD_EX_ARB_MODE, 0x2)
#define HSW_ROW_CHICKEN3 _MMIO(0xe49c)
+#define GEN9_ROW_CHICKEN3 MCR_REG(0xe49c)
#define HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE (1 << 6)
+#define MTL_DISABLE_FIX_FOR_EOT_FLUSH REG_BIT(9)
#define GEN8_ROW_CHICKEN MCR_REG(0xe4f0)
#define FLOW_CONTROL_ENABLE REG_BIT(15)
u64 start, u64 length);
void (*clear_range)(struct i915_address_space *vm,
u64 start, u64 length);
+ void (*scratch_range)(struct i915_address_space *vm,
+ u64 start, u64 length);
void (*insert_page)(struct i915_address_space *vm,
dma_addr_t addr,
u64 offset,
GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL;
}
+bool intel_check_bios_c6_setup(struct intel_rc6 *rc6)
+{
+ if (!rc6->bios_state_captured) {
+ struct intel_uncore *uncore = rc6_to_uncore(rc6);
+ intel_wakeref_t wakeref;
+
+ with_intel_runtime_pm(uncore->rpm, wakeref)
+ rc6->bios_rc_state = intel_uncore_read(uncore, GEN6_RC_STATE);
+
+ rc6->bios_state_captured = true;
+ }
+
+ return rc6->bios_rc_state & RC_SW_TARGET_STATE_MASK;
+}
+
static bool bxt_check_bios_rc6_setup(struct intel_rc6 *rc6)
{
struct intel_uncore *uncore = rc6_to_uncore(rc6);
return false;
}
+ if (IS_METEORLAKE(gt->i915) &&
+ !intel_check_bios_c6_setup(rc6)) {
+ drm_notice(&i915->drm,
+ "C6 disabled by BIOS\n");
+ return false;
+ }
+
if (IS_MTL_MEDIA_STEP(gt->i915, STEP_A0, STEP_B0) &&
gt->type == GT_MEDIA) {
drm_notice(&i915->drm,
void intel_rc6_fini(struct intel_rc6 *rc6)
{
struct drm_i915_gem_object *pctx;
+ struct intel_uncore *uncore = rc6_to_uncore(rc6);
intel_rc6_disable(rc6);
+ /* We want the BIOS C6 state preserved across loads for MTL */
+ if (IS_METEORLAKE(rc6_to_i915(rc6)) && rc6->bios_state_captured)
+ set(uncore, GEN6_RC_STATE, rc6->bios_rc_state);
+
pctx = fetch_and_zero(&rc6->pctx);
if (pctx)
i915_gem_object_put(pctx);
void intel_rc6_print_residency(struct seq_file *m, const char *title,
enum intel_rc6_res_type id);
+bool intel_check_bios_c6_setup(struct intel_rc6 *rc6);
+
#endif /* INTEL_RC6_H */
u64 cur_residency[INTEL_RC6_RES_MAX];
u32 ctl_enable;
+ u32 bios_rc_state;
struct drm_i915_gem_object *pctx;
bool enabled : 1;
bool manual : 1;
bool wakeref : 1;
+ bool bios_state_captured : 1;
};
#endif /* INTEL_RC6_TYPES_H */
struct resource *root_res;
resource_size_t rebar_size;
resource_size_t current_size;
+ intel_wakeref_t wakeref;
u32 pci_cmd;
int i;
return;
}
- /* First disable PCI memory decoding references */
- pci_read_config_dword(pdev, PCI_COMMAND, &pci_cmd);
- pci_write_config_dword(pdev, PCI_COMMAND,
- pci_cmd & ~PCI_COMMAND_MEMORY);
+ /*
+ * Releasing forcewake during BAR resizing results in later forcewake
+ * ack timeouts and former can happen any time - it is asynchronous.
+ * Grabbing all forcewakes prevents it.
+ */
+ with_intel_runtime_pm(i915->uncore.rpm, wakeref) {
+ intel_uncore_forcewake_get(&i915->uncore, FORCEWAKE_ALL);
- _resize_bar(i915, GEN12_LMEM_BAR, rebar_size);
+ /* First disable PCI memory decoding references */
+ pci_read_config_dword(pdev, PCI_COMMAND, &pci_cmd);
+ pci_write_config_dword(pdev, PCI_COMMAND,
+ pci_cmd & ~PCI_COMMAND_MEMORY);
- pci_assign_unassigned_bus_resources(pdev->bus);
- pci_write_config_dword(pdev, PCI_COMMAND, pci_cmd);
+ _resize_bar(i915, GEN12_LMEM_BAR, rebar_size);
+
+ pci_assign_unassigned_bus_resources(pdev->bus);
+ pci_write_config_dword(pdev, PCI_COMMAND, pci_cmd);
+ intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL);
+ }
}
#else
static void i915_resize_lmem_bar(struct drm_i915_private *i915, resource_size_t lmem_size) {}
#include "gt/intel_gt_regs.h"
+#include "gt/uc/intel_gsc_fw.h"
+
#include "i915_drv.h"
#include "i915_file_private.h"
#include "i915_gpu_error.h"
static int gen6_hw_domain_reset(struct intel_gt *gt, u32 hw_domain_mask)
{
struct intel_uncore *uncore = gt->uncore;
- int loops = 2;
+ int loops;
int err;
+ /*
+ * On some platforms, e.g. Jasperlake, we see that the engine register
+ * state is not cleared until shortly after GDRST reports completion,
+ * causing a failure as we try to immediately resume while the internal
+ * state is still in flux. If we immediately repeat the reset, the
+ * second reset appears to serialise with the first, and since it is a
+ * no-op, the registers should retain their reset value. However, there
+ * is still a concern that upon leaving the second reset, the internal
+ * engine state is still in flux and not ready for resuming.
+ *
+ * Starting on MTL, there are some prep steps that we need to do when
+ * resetting some engines that need to be applied every time we write to
+ * GEN6_GDRST. As those are time consuming (tens of ms), we don't want
+ * to perform that twice, so, since the Jasperlake issue hasn't been
+ * observed on MTL, we avoid repeating the reset on newer platforms.
+ */
+ loops = GRAPHICS_VER_FULL(gt->i915) < IP_VER(12, 70) ? 2 : 1;
+
/*
* GEN6_GDRST is not in the gt power well, no need to check
* for fifo space for the write or forcewake the chip for
do {
intel_uncore_write_fw(uncore, GEN6_GDRST, hw_domain_mask);
- /*
- * Wait for the device to ack the reset requests.
- *
- * On some platforms, e.g. Jasperlake, we see that the
- * engine register state is not cleared until shortly after
- * GDRST reports completion, causing a failure as we try
- * to immediately resume while the internal state is still
- * in flux. If we immediately repeat the reset, the second
- * reset appears to serialise with the first, and since
- * it is a no-op, the registers should retain their reset
- * value. However, there is still a concern that upon
- * leaving the second reset, the internal engine state
- * is still in flux and not ready for resuming.
- */
+ /* Wait for the device to ack the reset requests. */
err = __intel_wait_for_register_fw(uncore, GEN6_GDRST,
hw_domain_mask, 0,
2000, 0,
return NULL;
}
+static int __reset_guc(struct intel_gt *gt)
+{
+ u32 guc_domain =
+ GRAPHICS_VER(gt->i915) >= 11 ? GEN11_GRDOM_GUC : GEN9_GRDOM_GUC;
+
+ return gen6_hw_domain_reset(gt, guc_domain);
+}
+
+static bool needs_wa_14015076503(struct intel_gt *gt, intel_engine_mask_t engine_mask)
+{
+ if (!IS_METEORLAKE(gt->i915) || !HAS_ENGINE(gt, GSC0))
+ return false;
+
+ if (!__HAS_ENGINE(engine_mask, GSC0))
+ return false;
+
+ return intel_gsc_uc_fw_init_done(>->uc.gsc);
+}
+
+static intel_engine_mask_t
+wa_14015076503_start(struct intel_gt *gt, intel_engine_mask_t engine_mask, bool first)
+{
+ if (!needs_wa_14015076503(gt, engine_mask))
+ return engine_mask;
+
+ /*
+ * wa_14015076503: if the GSC FW is loaded, we need to alert it that
+ * we're going to do a GSC engine reset and then wait for 200ms for the
+ * FW to get ready for it. However, if this is the first ALL_ENGINES
+ * reset attempt and the GSC is not busy, we can try to instead reset
+ * the GuC and all the other engines individually to avoid the 200ms
+ * wait.
+ * Skipping the GSC engine is safe because, differently from other
+ * engines, the GSCCS only role is to forward the commands to the GSC
+ * FW, so it doesn't have any HW outside of the CS itself and therefore
+ * it has no state that we don't explicitly re-init on resume or on
+ * context switch LRC or power context). The HW for the GSC uC is
+ * managed by the GSC FW so we don't need to care about that.
+ */
+ if (engine_mask == ALL_ENGINES && first && intel_engine_is_idle(gt->engine[GSC0])) {
+ __reset_guc(gt);
+ engine_mask = gt->info.engine_mask & ~BIT(GSC0);
+ } else {
+ intel_uncore_rmw(gt->uncore,
+ HECI_H_GS1(MTL_GSC_HECI2_BASE),
+ 0, HECI_H_GS1_ER_PREP);
+
+ /* make sure the reset bit is clear when writing the CSR reg */
+ intel_uncore_rmw(gt->uncore,
+ HECI_H_CSR(MTL_GSC_HECI2_BASE),
+ HECI_H_CSR_RST, HECI_H_CSR_IG);
+ msleep(200);
+ }
+
+ return engine_mask;
+}
+
+static void
+wa_14015076503_end(struct intel_gt *gt, intel_engine_mask_t engine_mask)
+{
+ if (!needs_wa_14015076503(gt, engine_mask))
+ return;
+
+ intel_uncore_rmw(gt->uncore,
+ HECI_H_GS1(MTL_GSC_HECI2_BASE),
+ HECI_H_GS1_ER_PREP, 0);
+}
+
int __intel_gt_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask)
{
const int retries = engine_mask == ALL_ENGINES ? RESET_MAX_RETRIES : 1;
*/
intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL);
for (retry = 0; ret == -ETIMEDOUT && retry < retries; retry++) {
- GT_TRACE(gt, "engine_mask=%x\n", engine_mask);
+ intel_engine_mask_t reset_mask;
+
+ reset_mask = wa_14015076503_start(gt, engine_mask, !retry);
+
+ GT_TRACE(gt, "engine_mask=%x\n", reset_mask);
preempt_disable();
- ret = reset(gt, engine_mask, retry);
+ ret = reset(gt, reset_mask, retry);
preempt_enable();
+
+ wa_14015076503_end(gt, reset_mask);
}
intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL);
int intel_reset_guc(struct intel_gt *gt)
{
- u32 guc_domain =
- GRAPHICS_VER(gt->i915) >= 11 ? GEN11_GRDOM_GUC : GEN9_GRDOM_GUC;
int ret;
GEM_BUG_ON(!HAS_GT_UC(gt->i915));
intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL);
- ret = gen6_hw_domain_reset(gt, guc_domain);
+ ret = __reset_guc(gt);
intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL);
return ret;
rps_disable_interrupts(rps);
}
-u32 intel_rps_read_rpstat_fw(struct intel_rps *rps)
-{
- struct drm_i915_private *i915 = rps_to_i915(rps);
- i915_reg_t rpstat;
-
- rpstat = (GRAPHICS_VER(i915) >= 12) ? GEN12_RPSTAT1 : GEN6_RPSTAT1;
-
- return intel_uncore_read_fw(rps_to_gt(rps)->uncore, rpstat);
-}
-
u32 intel_rps_read_rpstat(struct intel_rps *rps)
{
struct drm_i915_private *i915 = rps_to_i915(rps);
return intel_uncore_read(rps_to_gt(rps)->uncore, rpstat);
}
-u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat)
+static u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat)
{
struct drm_i915_private *i915 = rps_to_i915(rps);
u32 cagf;
return cagf;
}
-static u32 read_cagf(struct intel_rps *rps)
+static u32 __read_cagf(struct intel_rps *rps, bool take_fw)
{
struct drm_i915_private *i915 = rps_to_i915(rps);
struct intel_uncore *uncore = rps_to_uncore(rps);
+ i915_reg_t r = INVALID_MMIO_REG;
u32 freq;
/*
* registers will return 0 freq when GT is in RC6
*/
if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) {
- freq = intel_uncore_read(uncore, MTL_MIRROR_TARGET_WP1);
+ r = MTL_MIRROR_TARGET_WP1;
} else if (GRAPHICS_VER(i915) >= 12) {
- freq = intel_uncore_read(uncore, GEN12_RPSTAT1);
+ r = GEN12_RPSTAT1;
} else if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) {
vlv_punit_get(i915);
freq = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS);
vlv_punit_put(i915);
} else if (GRAPHICS_VER(i915) >= 6) {
- freq = intel_uncore_read(uncore, GEN6_RPSTAT1);
+ r = GEN6_RPSTAT1;
} else {
- freq = intel_uncore_read(uncore, MEMSTAT_ILK);
+ r = MEMSTAT_ILK;
}
+ if (i915_mmio_reg_valid(r))
+ freq = take_fw ? intel_uncore_read(uncore, r) : intel_uncore_read_fw(uncore, r);
+
return intel_rps_get_cagf(rps, freq);
}
+static u32 read_cagf(struct intel_rps *rps)
+{
+ return __read_cagf(rps, true);
+}
+
u32 intel_rps_read_actual_frequency(struct intel_rps *rps)
{
struct intel_runtime_pm *rpm = rps_to_uncore(rps)->rpm;
return freq;
}
-u32 intel_rps_read_punit_req(struct intel_rps *rps)
+u32 intel_rps_read_actual_frequency_fw(struct intel_rps *rps)
+{
+ return intel_gpu_freq(rps, __read_cagf(rps, false));
+}
+
+static u32 intel_rps_read_punit_req(struct intel_rps *rps)
{
struct intel_uncore *uncore = rps_to_uncore(rps);
struct intel_runtime_pm *rpm = rps_to_uncore(rps)->rpm;
static struct drm_i915_private __rcu *ips_mchdev;
-/**
+/*
* Tells the intel_ips driver that the i915 driver is now loaded, if
* IPS got loaded first.
*
int intel_gpu_freq(struct intel_rps *rps, int val);
int intel_freq_opcode(struct intel_rps *rps, int val);
-u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat1);
u32 intel_rps_read_actual_frequency(struct intel_rps *rps);
+u32 intel_rps_read_actual_frequency_fw(struct intel_rps *rps);
u32 intel_rps_get_requested_frequency(struct intel_rps *rps);
u32 intel_rps_get_min_frequency(struct intel_rps *rps);
u32 intel_rps_get_min_raw_freq(struct intel_rps *rps);
u32 intel_rps_get_rp0_frequency(struct intel_rps *rps);
u32 intel_rps_get_rp1_frequency(struct intel_rps *rps);
u32 intel_rps_get_rpn_frequency(struct intel_rps *rps);
-u32 intel_rps_read_punit_req(struct intel_rps *rps);
u32 intel_rps_read_punit_req_frequency(struct intel_rps *rps);
u32 intel_rps_read_rpstat(struct intel_rps *rps);
-u32 intel_rps_read_rpstat_fw(struct intel_rps *rps);
void gen6_rps_get_freq_caps(struct intel_rps *rps, struct intel_rps_freq_caps *caps);
void intel_rps_raise_unslice(struct intel_rps *rps);
void intel_rps_lower_unslice(struct intel_rps *rps);
#include <linux/string_helpers.h>
#include "i915_drv.h"
+#include "i915_perf_types.h"
#include "intel_engine_regs.h"
#include "intel_gt_regs.h"
#include "intel_sseu.h"
* If i915/perf is active, we want a stable powergating configuration
* on the system. Use the configuration pinned by i915/perf.
*/
- if (gt->perf.exclusive_stream)
+ if (gt->perf.group && gt->perf.group[PERF_GROUP_OAG].exclusive_stream)
req_sseu = >->perf.sseu;
slices = hweight8(req_sseu->slice_mask);
add_render_compute_tuning_settings(i915, wal);
+ if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_B0, STEP_FOREVER) ||
+ IS_MTL_GRAPHICS_STEP(i915, P, STEP_B0, STEP_FOREVER))
+ /* Wa_14017856879 */
+ wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN3, MTL_DISABLE_FIX_FOR_EOT_FLUSH);
+
+ if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) ||
+ IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0))
+ /*
+ * Wa_14017066071
+ * Wa_14017654203
+ */
+ wa_mcr_masked_en(wal, GEN10_SAMPLER_MODE,
+ MTL_DISABLE_SAMPLER_SC_OOO);
+
+ if (IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0))
+ /* Wa_22015279794 */
+ wa_mcr_masked_en(wal, GEN10_CACHE_MODE_SS,
+ DISABLE_PREFETCH_INTO_IC);
+
if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) ||
IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0) ||
IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_FOREVER) ||
return a >= b;
}
-static int setup_watcher(struct hwsp_watcher *w, struct intel_gt *gt)
+static int setup_watcher(struct hwsp_watcher *w, struct intel_gt *gt,
+ struct intel_timeline *tl)
{
struct drm_i915_gem_object *obj;
struct i915_vma *vma;
if (IS_ERR(obj))
return PTR_ERR(obj);
- w->map = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB);
+ /* keep the same cache settings as timeline */
+ i915_gem_object_set_cache_coherency(obj, tl->hwsp_ggtt->obj->cache_level);
+ w->map = i915_gem_object_pin_map_unlocked(obj,
+ page_unmask_bits(tl->hwsp_ggtt->obj->mm.mapping));
if (IS_ERR(w->map)) {
i915_gem_object_put(obj);
return PTR_ERR(w->map);
if (!tl->has_initial_breadcrumb)
goto out_free;
+ selftest_tl_pin(tl);
+
for (i = 0; i < ARRAY_SIZE(watcher); i++) {
- err = setup_watcher(&watcher[i], gt);
+ err = setup_watcher(&watcher[i], gt, tl);
if (err)
goto out;
}
for (i = 0; i < ARRAY_SIZE(watcher); i++)
cleanup_watcher(&watcher[i]);
+ intel_timeline_unpin(tl);
+
if (igt_flush_test(gt->i915))
err = -EIO;
INTEL_GUC_LOAD_STATUS_ERROR_DEVID_BUILD_MISMATCH = 0x02,
INTEL_GUC_LOAD_STATUS_GUC_PREPROD_BUILD_MISMATCH = 0x03,
INTEL_GUC_LOAD_STATUS_ERROR_DEVID_INVALID_GUCTYPE = 0x04,
+ INTEL_GUC_LOAD_STATUS_HWCONFIG_START = 0x05,
+ INTEL_GUC_LOAD_STATUS_HWCONFIG_DONE = 0x06,
+ INTEL_GUC_LOAD_STATUS_HWCONFIG_ERROR = 0x07,
INTEL_GUC_LOAD_STATUS_GDT_DONE = 0x10,
INTEL_GUC_LOAD_STATUS_IDT_DONE = 0x20,
INTEL_GUC_LOAD_STATUS_LAPIC_DONE = 0x30,
INTEL_GUC_LOAD_STATUS_READY = 0xF0,
};
+enum intel_bootrom_load_status {
+ INTEL_BOOTROM_STATUS_NO_KEY_FOUND = 0x13,
+ INTEL_BOOTROM_STATUS_AES_PROD_KEY_FOUND = 0x1A,
+ INTEL_BOOTROM_STATUS_RSA_FAILED = 0x50,
+ INTEL_BOOTROM_STATUS_PAVPC_FAILED = 0x73,
+ INTEL_BOOTROM_STATUS_WOPCM_FAILED = 0x74,
+ INTEL_BOOTROM_STATUS_LOADLOC_FAILED = 0x75,
+ INTEL_BOOTROM_STATUS_JUMP_PASSED = 0x76,
+ INTEL_BOOTROM_STATUS_JUMP_FAILED = 0x77,
+ INTEL_BOOTROM_STATUS_RC6CTXCONFIG_FAILED = 0x79,
+ INTEL_BOOTROM_STATUS_MPUMAP_INCORRECT = 0x7A,
+ INTEL_BOOTROM_STATUS_EXCEPTION = 0x7E,
+};
+
#endif /* _ABI_GUC_ERRORS_ABI_H */
#include <linux/types.h>
struct intel_gsc_uc;
+struct intel_uncore;
int intel_gsc_uc_fw_upload(struct intel_gsc_uc *gsc);
bool intel_gsc_uc_fw_init_done(struct intel_gsc_uc *gsc);
+
#endif
/** @capture: the error-state-capture module's data and objects */
struct intel_guc_state_capture *capture;
+ struct dentry *dbgfs_node;
+
/** @sched_engine: Global engine used to submit requests to GuC */
struct i915_sched_engine *sched_engine;
/**
#include "gt/intel_gt.h"
#include "gt/intel_gt_mcr.h"
#include "gt/intel_gt_regs.h"
+#include "gt/intel_rps.h"
#include "intel_guc_fw.h"
#include "intel_guc_print.h"
#include "i915_drv.h"
/*
* Read the GuC status register (GUC_STATUS) and store it in the
* specified location; then return a boolean indicating whether
- * the value matches either of two values representing completion
- * of the GuC boot process.
+ * the value matches either completion or a known failure code.
*
* This is used for polling the GuC status in a wait_for()
* loop below.
*/
-static inline bool guc_ready(struct intel_uncore *uncore, u32 *status)
+static inline bool guc_load_done(struct intel_uncore *uncore, u32 *status, bool *success)
{
u32 val = intel_uncore_read(uncore, GUC_STATUS);
u32 uk_val = REG_FIELD_GET(GS_UKERNEL_MASK, val);
+ u32 br_val = REG_FIELD_GET(GS_BOOTROM_MASK, val);
*status = val;
- return uk_val == INTEL_GUC_LOAD_STATUS_READY;
+ switch (uk_val) {
+ case INTEL_GUC_LOAD_STATUS_READY:
+ *success = true;
+ return true;
+
+ case INTEL_GUC_LOAD_STATUS_ERROR_DEVID_BUILD_MISMATCH:
+ case INTEL_GUC_LOAD_STATUS_GUC_PREPROD_BUILD_MISMATCH:
+ case INTEL_GUC_LOAD_STATUS_ERROR_DEVID_INVALID_GUCTYPE:
+ case INTEL_GUC_LOAD_STATUS_HWCONFIG_ERROR:
+ case INTEL_GUC_LOAD_STATUS_DPC_ERROR:
+ case INTEL_GUC_LOAD_STATUS_EXCEPTION:
+ case INTEL_GUC_LOAD_STATUS_INIT_DATA_INVALID:
+ case INTEL_GUC_LOAD_STATUS_MPU_DATA_INVALID:
+ case INTEL_GUC_LOAD_STATUS_INIT_MMIO_SAVE_RESTORE_INVALID:
+ *success = false;
+ return true;
+ }
+
+ switch (br_val) {
+ case INTEL_BOOTROM_STATUS_NO_KEY_FOUND:
+ case INTEL_BOOTROM_STATUS_RSA_FAILED:
+ case INTEL_BOOTROM_STATUS_PAVPC_FAILED:
+ case INTEL_BOOTROM_STATUS_WOPCM_FAILED:
+ case INTEL_BOOTROM_STATUS_LOADLOC_FAILED:
+ case INTEL_BOOTROM_STATUS_JUMP_FAILED:
+ case INTEL_BOOTROM_STATUS_RC6CTXCONFIG_FAILED:
+ case INTEL_BOOTROM_STATUS_MPUMAP_INCORRECT:
+ case INTEL_BOOTROM_STATUS_EXCEPTION:
+ *success = false;
+ return true;
+ }
+
+ return false;
}
+/*
+ * Use a longer timeout for debug builds so that problems can be detected
+ * and analysed. But a shorter timeout for releases so that user's don't
+ * wait forever to find out there is a problem. Note that the only reason
+ * an end user should hit the timeout is in case of extreme thermal throttling.
+ * And a system that is that hot during boot is probably dead anyway!
+ */
+#if defined(CONFIG_DRM_I915_DEBUG_GEM)
+#define GUC_LOAD_RETRY_LIMIT 20
+#else
+#define GUC_LOAD_RETRY_LIMIT 3
+#endif
+
static int guc_wait_ucode(struct intel_guc *guc)
{
struct intel_gt *gt = guc_to_gt(guc);
struct intel_uncore *uncore = gt->uncore;
+ ktime_t before, after, delta;
+ bool success;
u32 status;
- int ret;
+ int ret, count;
+ u64 delta_ms;
+ u32 before_freq;
/*
* Wait for the GuC to start up.
- * NB: Docs recommend not using the interrupt for completion.
+ *
* Measurements indicate this should take no more than 20ms
* (assuming the GT clock is at maximum frequency). So, a
* timeout here indicates that the GuC has failed and is unusable.
* issues to be resolved. In the meantime bump the timeout to
* 200ms. Even at slowest clock, this should be sufficient. And
* in the working case, a larger timeout makes no difference.
+ *
+ * IFWI updates have also been seen to cause sporadic failures due to
+ * the requested frequency not being granted and thus the firmware
+ * load is attempted at minimum frequency. That can lead to load times
+ * in the seconds range. However, there is a limit on how long an
+ * individual wait_for() can wait. So wrap it in a loop.
*/
- ret = wait_for(guc_ready(uncore, &status), 200);
- if (ret) {
- guc_info(guc, "load failed: status = 0x%08X\n", status);
- guc_info(guc, "load failed: status: Reset = %d, "
- "BootROM = 0x%02X, UKernel = 0x%02X, "
- "MIA = 0x%02X, Auth = 0x%02X\n",
- REG_FIELD_GET(GS_MIA_IN_RESET, status),
- REG_FIELD_GET(GS_BOOTROM_MASK, status),
- REG_FIELD_GET(GS_UKERNEL_MASK, status),
- REG_FIELD_GET(GS_MIA_MASK, status),
- REG_FIELD_GET(GS_AUTH_STATUS_MASK, status));
-
- if ((status & GS_BOOTROM_MASK) == GS_BOOTROM_RSA_FAILED) {
+ before_freq = intel_rps_read_actual_frequency(&uncore->gt->rps);
+ before = ktime_get();
+ for (count = 0; count < GUC_LOAD_RETRY_LIMIT; count++) {
+ ret = wait_for(guc_load_done(uncore, &status, &success), 1000);
+ if (!ret || !success)
+ break;
+
+ guc_dbg(guc, "load still in progress, count = %d, freq = %dMHz\n",
+ count, intel_rps_read_actual_frequency(&uncore->gt->rps));
+ }
+ after = ktime_get();
+ delta = ktime_sub(after, before);
+ delta_ms = ktime_to_ms(delta);
+ if (ret || !success) {
+ u32 ukernel = REG_FIELD_GET(GS_UKERNEL_MASK, status);
+ u32 bootrom = REG_FIELD_GET(GS_BOOTROM_MASK, status);
+
+ guc_info(guc, "load failed: status = 0x%08X, time = %lldms, freq = %dMHz, ret = %d\n",
+ status, delta_ms, intel_rps_read_actual_frequency(&uncore->gt->rps), ret);
+ guc_info(guc, "load failed: status: Reset = %d, BootROM = 0x%02X, UKernel = 0x%02X, MIA = 0x%02X, Auth = 0x%02X\n",
+ REG_FIELD_GET(GS_MIA_IN_RESET, status),
+ bootrom, ukernel,
+ REG_FIELD_GET(GS_MIA_MASK, status),
+ REG_FIELD_GET(GS_AUTH_STATUS_MASK, status));
+
+ switch (bootrom) {
+ case INTEL_BOOTROM_STATUS_NO_KEY_FOUND:
+ guc_info(guc, "invalid key requested, header = 0x%08X\n",
+ intel_uncore_read(uncore, GUC_HEADER_INFO));
+ ret = -ENOEXEC;
+ break;
+
+ case INTEL_BOOTROM_STATUS_RSA_FAILED:
guc_info(guc, "firmware signature verification failed\n");
ret = -ENOEXEC;
+ break;
}
- if (REG_FIELD_GET(GS_UKERNEL_MASK, status) == INTEL_GUC_LOAD_STATUS_EXCEPTION) {
+ switch (ukernel) {
+ case INTEL_GUC_LOAD_STATUS_EXCEPTION:
guc_info(guc, "firmware exception. EIP: %#x\n",
intel_uncore_read(uncore, SOFT_SCRATCH(13)));
ret = -ENXIO;
+ break;
+
+ case INTEL_GUC_LOAD_STATUS_INIT_MMIO_SAVE_RESTORE_INVALID:
+ guc_info(guc, "illegal register in save/restore workaround list\n");
+ ret = -EPERM;
+ break;
+
+ case INTEL_GUC_LOAD_STATUS_HWCONFIG_START:
+ guc_info(guc, "still extracting hwconfig table.\n");
+ ret = -ETIMEDOUT;
+ break;
}
+
+ /* Uncommon/unexpected error, see earlier status code print for details */
+ if (ret == 0)
+ ret = -ENXIO;
+ } else if (delta_ms > 200) {
+ guc_warn(guc, "excessive init time: %lldms! [freq = %dMHz, before = %dMHz, status = 0x%08X, count = %d, ret = %d]\n",
+ delta_ms, intel_rps_read_actual_frequency(&uncore->gt->rps),
+ before_freq, status, count, ret);
+ } else {
+ guc_dbg(guc, "init took %lldms, freq = %dMHz, before = %dMHz, status = 0x%08X, count = %d, ret = %d\n",
+ delta_ms, intel_rps_read_actual_frequency(&uncore->gt->rps),
+ before_freq, status, count, ret);
}
return ret;
return false;
}
-/**
+/*
* intel_guc_hwconfig_init - Initialize the HWConfig
*
* Retrieve the HWConfig table from the GuC and save it locally.
return 0;
}
-/**
+/*
* intel_gt_init_hwconfig - Initialize the HWConfig if available
*
* Retrieve the HWConfig table if available on the current platform.
return guc_hwconfig_init(gt);
}
-/**
+/*
* intel_gt_fini_hwconfig - Finalize the HWConfig
*
* Free up the memory allocation holding the table.
*/
n_subbufs = 8;
+ if (!guc->dbgfs_node)
+ return -ENOENT;
+
guc_log_relay_chan = relay_open("guc_log",
- i915->drm.primary->debugfs_root,
+ guc->dbgfs_node,
subbuf_size, n_subbufs,
&relay_callbacks, i915);
if (!guc_log_relay_chan) {
#define GS_MIA_IN_RESET (0x01 << GS_RESET_SHIFT)
#define GS_BOOTROM_SHIFT 1
#define GS_BOOTROM_MASK (0x7F << GS_BOOTROM_SHIFT)
-#define GS_BOOTROM_RSA_FAILED (0x50 << GS_BOOTROM_SHIFT)
-#define GS_BOOTROM_JUMP_PASSED (0x76 << GS_BOOTROM_SHIFT)
#define GS_UKERNEL_SHIFT 8
#define GS_UKERNEL_MASK (0xFF << GS_UKERNEL_SHIFT)
#define GS_MIA_SHIFT 16
#define GS_AUTH_STATUS_BAD (0x01 << GS_AUTH_STATUS_SHIFT)
#define GS_AUTH_STATUS_GOOD (0x02 << GS_AUTH_STATUS_SHIFT)
+#define GUC_HEADER_INFO _MMIO(0xc014)
+
#define SOFT_SCRATCH(n) _MMIO(0xc180 + (n) * 4)
#define SOFT_SCRATCH_COUNT 16
i915_sw_fence_fini(&huc->delayed_load.fence);
}
+int intel_huc_sanitize(struct intel_huc *huc)
+{
+ delayed_huc_load_complete(huc);
+ intel_uc_fw_sanitize(&huc->fw);
+ return 0;
+}
+
static bool vcs_supported(struct intel_gt *gt)
{
intel_engine_mask_t mask = gt->info.engine_mask;
} delayed_load;
};
+int intel_huc_sanitize(struct intel_huc *huc);
void intel_huc_init_early(struct intel_huc *huc);
int intel_huc_init(struct intel_huc *huc);
void intel_huc_fini(struct intel_huc *huc);
void intel_huc_register_gsc_notifier(struct intel_huc *huc, struct bus_type *bus);
void intel_huc_unregister_gsc_notifier(struct intel_huc *huc, struct bus_type *bus);
-static inline int intel_huc_sanitize(struct intel_huc *huc)
-{
- intel_uc_fw_sanitize(&huc->fw);
- return 0;
-}
-
static inline bool intel_huc_is_supported(struct intel_huc *huc)
{
return intel_uc_fw_is_supported(&huc->fw);
if (IS_ERR(root))
return;
+ uc->guc.dbgfs_node = root;
+
intel_gt_debugfs_register_files(root, files, ARRAY_SIZE(files), uc);
intel_guc_debugfs_register(&uc->guc, root);
static int i915_wedged_get(void *data, u64 *val)
{
struct drm_i915_private *i915 = data;
+ struct intel_gt *gt;
+ unsigned int i;
- return intel_gt_debugfs_reset_show(to_gt(i915), val);
+ *val = 0;
+
+ for_each_gt(gt, i915, i) {
+ int ret;
+
+ ret = intel_gt_debugfs_reset_show(gt, val);
+ if (ret)
+ return ret;
+
+ /* at least one tile should be wedged */
+ if (*val)
+ break;
+ }
+
+ return 0;
}
static int i915_wedged_set(void *data, u64 val)
{
struct drm_i915_private *i915 = data;
- intel_gt_debugfs_reset_store(to_gt(i915), val);
+ struct intel_gt *gt;
+ unsigned int i;
+
+ for_each_gt(gt, i915, i)
+ intel_gt_debugfs_reset_store(gt, val);
return 0;
}
static int i915_forcewake_open(struct inode *inode, struct file *file)
{
struct drm_i915_private *i915 = inode->i_private;
- intel_gt_pm_debugfs_forcewake_user_open(to_gt(i915));
+ struct intel_gt *gt;
+ unsigned int i;
+
+ for_each_gt(gt, i915, i)
+ intel_gt_pm_debugfs_forcewake_user_open(gt);
return 0;
}
static int i915_forcewake_release(struct inode *inode, struct file *file)
{
struct drm_i915_private *i915 = inode->i_private;
- intel_gt_pm_debugfs_forcewake_user_release(to_gt(i915));
+ struct intel_gt *gt;
+ unsigned int i;
+
+ for_each_gt(gt, i915, i)
+ intel_gt_pm_debugfs_forcewake_user_release(gt);
return 0;
}
if (ret)
return ret;
- i915_perf_init(dev_priv);
+ ret = i915_perf_init(dev_priv);
+ if (ret)
+ return ret;
ret = i915_ggtt_probe_hw(dev_priv);
if (ret)
{
struct drm_file *file = f->private_data;
struct drm_i915_file_private *file_priv = file->driver_priv;
- struct drm_i915_private *i915 = file_priv->dev_priv;
+ struct drm_i915_private *i915 = file_priv->i915;
struct i915_drm_client *client = file_priv->client;
struct pci_dev *pdev = to_pci_dev(i915->drm.dev);
unsigned int i;
(INTEL_INFO(dev_priv)->has_oa_bpc_reporting)
#define HAS_OA_SLICE_CONTRIB_LIMITS(dev_priv) \
(INTEL_INFO(dev_priv)->has_oa_slice_contrib_limits)
+#define HAS_OAM(dev_priv) \
+ (INTEL_INFO(dev_priv)->has_oam)
/*
* Set this flag, when platform requires 64K GTT page sizes or larger for
struct i915_drm_client;
struct drm_i915_file_private {
- struct drm_i915_private *dev_priv;
+ struct drm_i915_private *i915;
union {
struct drm_file *file;
}
/**
- * Reads data from the object referenced by handle.
+ * i915_gem_pread_ioctl - Reads data from the object referenced by handle.
* @dev: drm device pointer
* @data: ioctl data blob
* @file: drm file pointer
}
/**
- * This is the fast pwrite path, where we copy the data directly from the
+ * i915_gem_gtt_pwrite_fast - This is the fast pwrite path, where we copy the data directly from the
* user into the GTT, uncached.
* @obj: i915 GEM object
* @args: pwrite arguments structure
}
/**
- * Writes data to the object referenced by handle.
+ * i915_gem_pwrite_ioctl - Writes data to the object referenced by handle.
* @dev: drm device
* @data: ioctl data blob
* @file: drm file
}
/**
- * Called when user space has done writes to this buffer
+ * i915_gem_sw_finish_ioctl - Called when user space has done writes to this buffer
* @dev: drm device
* @data: ioctl data blob
* @file: drm file
}
file->driver_priv = file_priv;
- file_priv->dev_priv = i915;
+ file_priv->i915 = i915;
file_priv->file = file;
file_priv->client = client;
value = INTEL_INFO(i915)->has_coherent_ggtt;
break;
case I915_PARAM_PERF_REVISION:
- value = i915_perf_ioctl_version();
+ value = i915_perf_ioctl_version(i915);
break;
case I915_PARAM_OA_TIMESTAMP_FREQUENCY:
value = i915_perf_oa_timestamp_frequency(i915);
.has_mslice_steering = 1, \
.has_oa_bpc_reporting = 1, \
.has_oa_slice_contrib_limits = 1, \
+ .has_oam = 1, \
.has_rc6 = 1, \
.has_reset_engine = 1, \
.has_rps = 1, \
*/
#include <linux/anon_inodes.h>
+#include <linux/nospec.h>
#include <linux/sizes.h>
#include <linux/uuid.h>
#include "gt/intel_gt_regs.h"
#include "gt/intel_lrc.h"
#include "gt/intel_lrc_reg.h"
+#include "gt/intel_rc6.h"
#include "gt/intel_ring.h"
#include "gt/uc/intel_guc_slpc.h"
[I915_OA_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256 },
[I915_OAR_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256 },
[I915_OA_FORMAT_A24u40_A14u32_B8_C8] = { 5, 256 },
+ [I915_OAM_FORMAT_MPEC8u64_B8_C8] = { 1, 192, TYPE_OAM, HDR_64_BIT },
+ [I915_OAM_FORMAT_MPEC8u32_B8_C8] = { 2, 128, TYPE_OAM, HDR_64_BIT },
+};
+
+static const u32 mtl_oa_base[] = {
+ [PERF_GROUP_OAM_SAMEDIA_0] = 0x393000,
};
#define SAMPLE_OA_REPORT (1<<0)
kfree(oa_bo);
}
+static inline const
+struct i915_perf_regs *__oa_regs(struct i915_perf_stream *stream)
+{
+ return &stream->engine->oa_group->regs;
+}
+
static u32 gen12_oa_hw_tail_read(struct i915_perf_stream *stream)
{
struct intel_uncore *uncore = stream->uncore;
- return intel_uncore_read(uncore, GEN12_OAG_OATAILPTR) &
+ return intel_uncore_read(uncore, __oa_regs(stream)->oa_tail_ptr) &
GEN12_OAG_OATAILPTR_MASK;
}
return oastatus1 & GEN7_OASTATUS1_TAIL_MASK;
}
+#define oa_report_header_64bit(__s) \
+ ((__s)->oa_buffer.format->header == HDR_64_BIT)
+
+static u64 oa_report_id(struct i915_perf_stream *stream, void *report)
+{
+ return oa_report_header_64bit(stream) ? *(u64 *)report : *(u32 *)report;
+}
+
+static u64 oa_report_reason(struct i915_perf_stream *stream, void *report)
+{
+ return (oa_report_id(stream, report) >> OAREPORT_REASON_SHIFT) &
+ (GRAPHICS_VER(stream->perf->i915) == 12 ?
+ OAREPORT_REASON_MASK_EXTENDED :
+ OAREPORT_REASON_MASK);
+}
+
+static void oa_report_id_clear(struct i915_perf_stream *stream, u32 *report)
+{
+ if (oa_report_header_64bit(stream))
+ *(u64 *)report = 0;
+ else
+ *report = 0;
+}
+
+static bool oa_report_ctx_invalid(struct i915_perf_stream *stream, void *report)
+{
+ return !(oa_report_id(stream, report) &
+ stream->perf->gen8_valid_ctx_bit) &&
+ GRAPHICS_VER(stream->perf->i915) <= 11;
+}
+
+static u64 oa_timestamp(struct i915_perf_stream *stream, void *report)
+{
+ return oa_report_header_64bit(stream) ?
+ *((u64 *)report + 1) :
+ *((u32 *)report + 1);
+}
+
+static void oa_timestamp_clear(struct i915_perf_stream *stream, u32 *report)
+{
+ if (oa_report_header_64bit(stream))
+ *(u64 *)&report[2] = 0;
+ else
+ report[1] = 0;
+}
+
+static u32 oa_context_id(struct i915_perf_stream *stream, u32 *report)
+{
+ u32 ctx_id = oa_report_header_64bit(stream) ? report[4] : report[2];
+
+ return ctx_id & stream->specific_ctx_id_mask;
+}
+
+static void oa_context_id_squash(struct i915_perf_stream *stream, u32 *report)
+{
+ if (oa_report_header_64bit(stream))
+ report[4] = INVALID_CTX_ID;
+ else
+ report[2] = INVALID_CTX_ID;
+}
+
/**
* oa_buffer_check_unlocked - check for data and update tail ptr state
* @stream: i915 stream instance
bool pollin;
u32 hw_tail;
u64 now;
+ u32 partial_report_size;
/* We have to consider the (unlikely) possibility that read() errors
* could result in an OA buffer reset which might reset the head and
hw_tail = stream->perf->ops.oa_hw_tail_read(stream);
- /* The tail pointer increases in 64 byte increments,
- * not in report_size steps...
+ /* The tail pointer increases in 64 byte increments, not in report_size
+ * steps. Also the report size may not be a power of 2. Compute
+ * potentially partially landed report in the OA buffer
*/
- hw_tail &= ~(report_size - 1);
+ partial_report_size = OA_TAKEN(hw_tail, stream->oa_buffer.tail);
+ partial_report_size %= report_size;
+
+ /* Subtract partial amount off the tail */
+ hw_tail = gtt_offset + OA_TAKEN(hw_tail, partial_report_size);
now = ktime_get_mono_fast_ns();
hw_tail -= gtt_offset;
tail = hw_tail;
- /* Walk the stream backward until we find a report with dword 0
- * & 1 not at 0. Since the circular buffer pointers progress by
- * increments of 64 bytes and that reports can be up to 256
- * bytes long, we can't tell whether a report has fully landed
- * in memory before the first 2 dwords of the following report
- * have effectively landed.
+ /* Walk the stream backward until we find a report with report
+ * id and timestmap not at 0. Since the circular buffer pointers
+ * progress by increments of 64 bytes and that reports can be up
+ * to 256 bytes long, we can't tell whether a report has fully
+ * landed in memory before the report id and timestamp of the
+ * following report have effectively landed.
*
* This is assuming that the writes of the OA unit land in
* memory in the order they were written to.
* If not : (╯°□°)╯︵ ┻━┻
*/
while (OA_TAKEN(tail, aged_tail) >= report_size) {
- u32 *report32 = (void *)(stream->oa_buffer.vaddr + tail);
+ void *report = stream->oa_buffer.vaddr + tail;
- if (report32[0] != 0 || report32[1] != 0)
+ if (oa_report_id(stream, report) ||
+ oa_timestamp(stream, report))
break;
tail = (tail - report_size) & (OA_BUFFER_SIZE - 1);
{
int report_size = stream->oa_buffer.format->size;
struct drm_i915_perf_record_header header;
+ int report_size_partial;
+ u8 *oa_buf_end;
header.type = DRM_I915_PERF_RECORD_SAMPLE;
header.pad = 0;
return -EFAULT;
buf += sizeof(header);
- if (copy_to_user(buf, report, report_size))
+ oa_buf_end = stream->oa_buffer.vaddr + OA_BUFFER_SIZE;
+ report_size_partial = oa_buf_end - report;
+
+ if (report_size_partial < report_size) {
+ if (copy_to_user(buf, report, report_size_partial))
+ return -EFAULT;
+ buf += report_size_partial;
+
+ if (copy_to_user(buf, stream->oa_buffer.vaddr,
+ report_size - report_size_partial))
+ return -EFAULT;
+ } else if (copy_to_user(buf, report, report_size)) {
return -EFAULT;
+ }
(*offset) += header.size;
* An out of bounds or misaligned head or tail pointer implies a driver
* bug since we validate + align the tail pointers we read from the
* hardware and we are in full control of the head pointer which should
- * only be incremented by multiples of the report size (notably also
- * all a power of two).
+ * only be incremented by multiples of the report size.
*/
if (drm_WARN_ONCE(&uncore->i915->drm,
- head > OA_BUFFER_SIZE || head % report_size ||
- tail > OA_BUFFER_SIZE || tail % report_size,
+ head > OA_BUFFER_SIZE ||
+ tail > OA_BUFFER_SIZE,
"Inconsistent OA buffer pointers: head = %u, tail = %u\n",
head, tail))
return -EIO;
u8 *report = oa_buf_base + head;
u32 *report32 = (void *)report;
u32 ctx_id;
- u32 reason;
-
- /*
- * All the report sizes factor neatly into the buffer
- * size so we never expect to see a report split
- * between the beginning and end of the buffer.
- *
- * Given the initial alignment check a misalignment
- * here would imply a driver bug that would result
- * in an overrun.
- */
- if (drm_WARN_ON(&uncore->i915->drm,
- (OA_BUFFER_SIZE - head) < report_size)) {
- drm_err(&uncore->i915->drm,
- "Spurious OA head ptr: non-integral report offset\n");
- break;
- }
+ u64 reason;
/*
* The reason field includes flags identifying what
* triggered this specific report (mostly timer
* triggered or e.g. due to a context switch).
*
- * This field is never expected to be zero so we can
- * check that the report isn't invalid before copying
- * it to userspace...
+ * In MMIO triggered reports, some platforms do not set the
+ * reason bit in this field and it is valid to have a reason
+ * field of zero.
*/
- reason = ((report32[0] >> OAREPORT_REASON_SHIFT) &
- (GRAPHICS_VER(stream->perf->i915) == 12 ?
- OAREPORT_REASON_MASK_EXTENDED :
- OAREPORT_REASON_MASK));
-
- ctx_id = report32[2] & stream->specific_ctx_id_mask;
+ reason = oa_report_reason(stream, report);
+ ctx_id = oa_context_id(stream, report32);
/*
* Squash whatever is in the CTX_ID field if it's marked as
* Note: that we don't clear the valid_ctx_bit so userspace can
* understand that the ID has been squashed by the kernel.
*/
- if (!(report32[0] & stream->perf->gen8_valid_ctx_bit) &&
- GRAPHICS_VER(stream->perf->i915) <= 11)
- ctx_id = report32[2] = INVALID_CTX_ID;
+ if (oa_report_ctx_invalid(stream, report)) {
+ ctx_id = INVALID_CTX_ID;
+ oa_context_id_squash(stream, report32);
+ }
/*
* NB: For Gen 8 the OA unit no longer supports clock gating
*/
if (stream->ctx &&
stream->specific_ctx_id != ctx_id) {
- report32[2] = INVALID_CTX_ID;
+ oa_context_id_squash(stream, report32);
}
ret = append_oa_sample(stream, buf, count, offset,
}
/*
- * Clear out the first 2 dword as a mean to detect unlanded
+ * Clear out the report id and timestamp as a means to detect unlanded
* reports.
*/
- report32[0] = 0;
- report32[1] = 0;
+ oa_report_id_clear(stream, report32);
+ oa_timestamp_clear(stream, report32);
}
if (start_offset != *offset) {
i915_reg_t oaheadptr;
oaheadptr = GRAPHICS_VER(stream->perf->i915) == 12 ?
- GEN12_OAG_OAHEADPTR : GEN8_OAHEADPTR;
+ __oa_regs(stream)->oa_head_ptr :
+ GEN8_OAHEADPTR;
spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);
return -EIO;
oastatus_reg = GRAPHICS_VER(stream->perf->i915) == 12 ?
- GEN12_OAG_OASTATUS : GEN8_OASTATUS;
+ __oa_regs(stream)->oa_status :
+ GEN8_OASTATUS;
oastatus = intel_uncore_read(uncore, oastatus_reg);
i915_vma_unpin_and_release(&stream->noa_wait, 0);
}
+static bool engine_supports_oa(const struct intel_engine_cs *engine)
+{
+ return engine->oa_group;
+}
+
+static bool engine_supports_oa_format(struct intel_engine_cs *engine, int type)
+{
+ return engine->oa_group && engine->oa_group->type == type;
+}
+
static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
{
struct i915_perf *perf = stream->perf;
struct intel_gt *gt = stream->engine->gt;
+ struct i915_perf_group *g = stream->engine->oa_group;
- if (WARN_ON(stream != gt->perf.exclusive_stream))
+ if (WARN_ON(stream != g->exclusive_stream))
return;
/*
*
* See i915_oa_init_reg_state() and lrc_configure_all_contexts()
*/
- WRITE_ONCE(gt->perf.exclusive_stream, NULL);
+ WRITE_ONCE(g->exclusive_stream, NULL);
perf->ops.disable_metric_set(stream);
free_oa_buffer(stream);
/*
* Wa_16011777198:dg2: Unset the override of GUCRC mode to enable rc6.
*/
- if (intel_uc_uses_guc_rc(>->uc) &&
- (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_C0) ||
- IS_DG2_GRAPHICS_STEP(gt->i915, G11, STEP_A0, STEP_B0)))
+ if (stream->override_gucrc)
drm_WARN_ON(>->i915->drm,
intel_guc_slpc_unset_gucrc_mode(>->uc.guc.slpc));
spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);
- intel_uncore_write(uncore, GEN12_OAG_OASTATUS, 0);
- intel_uncore_write(uncore, GEN12_OAG_OAHEADPTR,
+ intel_uncore_write(uncore, __oa_regs(stream)->oa_status, 0);
+ intel_uncore_write(uncore, __oa_regs(stream)->oa_head_ptr,
gtt_offset & GEN12_OAG_OAHEADPTR_MASK);
stream->oa_buffer.head = gtt_offset;
* to enable proper functionality of the overflow
* bit."
*/
- intel_uncore_write(uncore, GEN12_OAG_OABUFFER, gtt_offset |
+ intel_uncore_write(uncore, __oa_regs(stream)->oa_buffer, gtt_offset |
OABUFFER_SIZE_16M | GEN8_OABUFFER_MEM_SELECT_GGTT);
- intel_uncore_write(uncore, GEN12_OAG_OATAILPTR,
+ intel_uncore_write(uncore, __oa_regs(stream)->oa_tail_ptr,
gtt_offset & GEN12_OAG_OATAILPTR_MASK);
/* Mark that we need updated tail pointers to read from... */
return err;
}
-static int gen8_configure_context(struct i915_gem_context *ctx,
+static int gen8_configure_context(struct i915_perf_stream *stream,
+ struct i915_gem_context *ctx,
struct flex *flex, unsigned int count)
{
struct i915_gem_engines_iter it;
spin_unlock(&i915->gem.contexts.lock);
- err = gen8_configure_context(ctx, regs, num_regs);
+ err = gen8_configure_context(stream, ctx, regs, num_regs);
if (err) {
i915_gem_context_put(ctx);
return err;
},
};
+ if (stream->engine->class != RENDER_CLASS)
+ return 0;
+
return oa_configure_all_contexts(stream,
regs, ARRAY_SIZE(regs),
active);
_MASKED_BIT_ENABLE(GEN12_DISABLE_DOP_GATING));
}
- intel_uncore_write(uncore, GEN12_OAG_OA_DEBUG,
+ intel_uncore_write(uncore, __oa_regs(stream)->oa_debug,
/* Disable clk ratio reports, like previous Gens. */
_MASKED_BIT_ENABLE(GEN12_OAG_OA_DEBUG_DISABLE_CLK_RATIO_REPORTS |
GEN12_OAG_OA_DEBUG_INCLUDE_CLK_RATIO) |
*/
oag_report_ctx_switches(stream));
- intel_uncore_write(uncore, GEN12_OAG_OAGLBCTXCTRL, periodic ?
+ intel_uncore_write(uncore, __oa_regs(stream)->oa_ctx_ctrl, periodic ?
(GEN12_OAG_OAGLBCTXCTRL_COUNTER_RESUME |
GEN12_OAG_OAGLBCTXCTRL_TIMER_ENABLE |
(period_exponent << GEN12_OAG_OAGLBCTXCTRL_TIMER_PERIOD_SHIFT))
static void gen12_oa_enable(struct i915_perf_stream *stream)
{
- struct intel_uncore *uncore = stream->uncore;
- u32 report_format = stream->oa_buffer.format->format;
+ const struct i915_perf_regs *regs;
+ u32 val;
/*
* If we don't want OA reports from the OA buffer, then we don't even
gen12_init_oa_buffer(stream);
- intel_uncore_write(uncore, GEN12_OAG_OACONTROL,
- (report_format << GEN12_OAG_OACONTROL_OA_COUNTER_FORMAT_SHIFT) |
- GEN12_OAG_OACONTROL_OA_COUNTER_ENABLE);
+ regs = __oa_regs(stream);
+ val = (stream->oa_buffer.format->format << regs->oa_ctrl_counter_format_shift) |
+ GEN12_OAG_OACONTROL_OA_COUNTER_ENABLE;
+
+ intel_uncore_write(stream->uncore, regs->oa_ctrl, val);
}
/**
{
struct intel_uncore *uncore = stream->uncore;
- intel_uncore_write(uncore, GEN12_OAG_OACONTROL, 0);
+ intel_uncore_write(uncore, __oa_regs(stream)->oa_ctrl, 0);
if (intel_wait_for_register(uncore,
- GEN12_OAG_OACONTROL,
+ __oa_regs(stream)->oa_ctrl,
GEN12_OAG_OACONTROL_OA_COUNTER_ENABLE, 0,
50))
drm_err(&stream->perf->i915->drm,
{
struct drm_i915_private *i915 = stream->perf->i915;
struct i915_perf *perf = stream->perf;
+ struct i915_perf_group *g;
struct intel_gt *gt;
int ret;
return -EINVAL;
}
gt = props->engine->gt;
+ g = props->engine->oa_group;
/*
* If the sysfs metrics/ directory wasn't registered for some
* counter reports and marshal to the appropriate client
* we currently only allow exclusive access
*/
- if (gt->perf.exclusive_stream) {
+ if (g->exclusive_stream) {
drm_dbg(&stream->perf->i915->drm,
"OA unit already in use\n");
return -EBUSY;
if (ret) {
drm_dbg(&stream->perf->i915->drm,
"Unable to override gucrc mode\n");
- goto err_config;
+ goto err_gucrc;
}
+
+ stream->override_gucrc = true;
}
ret = alloc_oa_buffer(stream);
stream->ops = &i915_oa_stream_ops;
stream->engine->gt->perf.sseu = props->sseu;
- WRITE_ONCE(gt->perf.exclusive_stream, stream);
+ WRITE_ONCE(g->exclusive_stream, stream);
ret = i915_perf_stream_enable_sync(stream);
if (ret) {
return 0;
err_enable:
- WRITE_ONCE(gt->perf.exclusive_stream, NULL);
+ WRITE_ONCE(g->exclusive_stream, NULL);
perf->ops.disable_metric_set(stream);
free_oa_buffer(stream);
err_oa_buf_alloc:
- free_oa_configs(stream);
+ if (stream->override_gucrc)
+ intel_guc_slpc_unset_gucrc_mode(>->uc.guc.slpc);
+err_gucrc:
intel_uncore_forcewake_put(stream->uncore, FORCEWAKE_ALL);
intel_engine_pm_put(stream->engine);
+ free_oa_configs(stream);
+
err_config:
free_noa_wait(stream);
return;
/* perf.exclusive_stream serialised by lrc_configure_all_contexts() */
- stream = READ_ONCE(engine->gt->perf.exclusive_stream);
+ stream = READ_ONCE(engine->oa_group->exclusive_stream);
if (stream && GRAPHICS_VER(stream->perf->i915) < 12)
gen8_update_reg_state_unlocked(ce, stream);
}
u32 n_props,
struct perf_open_properties *props)
{
+ struct drm_i915_gem_context_param_sseu user_sseu;
+ const struct i915_oa_format *f;
u64 __user *uprop = uprops;
+ bool config_instance = false;
+ bool config_class = false;
+ bool config_sseu = false;
+ u8 class, instance;
u32 i;
int ret;
memset(props, 0, sizeof(struct perf_open_properties));
props->poll_oa_period = DEFAULT_POLL_PERIOD_NS;
- if (!n_props) {
- drm_dbg(&perf->i915->drm,
- "No i915 perf properties given\n");
- return -EINVAL;
- }
-
- /* At the moment we only support using i915-perf on the RCS. */
- props->engine = intel_engine_lookup_user(perf->i915,
- I915_ENGINE_CLASS_RENDER,
- 0);
- if (!props->engine) {
- drm_dbg(&perf->i915->drm,
- "No RENDER-capable engines\n");
- return -EINVAL;
- }
-
/* Considering that ID = 0 is reserved and assuming that we don't
* (currently) expect any configurations to ever specify duplicate
* values for a particular property ID then the last _PROP_MAX value is
* one greater than the maximum number of properties we expect to get
* from userspace.
*/
- if (n_props >= DRM_I915_PERF_PROP_MAX) {
+ if (!n_props || n_props >= DRM_I915_PERF_PROP_MAX) {
drm_dbg(&perf->i915->drm,
- "More i915 perf properties specified than exist\n");
+ "Invalid number of i915 perf properties given\n");
return -EINVAL;
}
+ /* Defaults when class:instance is not passed */
+ class = I915_ENGINE_CLASS_RENDER;
+ instance = 0;
+
for (i = 0; i < n_props; i++) {
u64 oa_period, oa_freq_hz;
u64 id, value;
props->hold_preemption = !!value;
break;
case DRM_I915_PERF_PROP_GLOBAL_SSEU: {
- struct drm_i915_gem_context_param_sseu user_sseu;
-
if (GRAPHICS_VER_FULL(perf->i915) >= IP_VER(12, 50)) {
drm_dbg(&perf->i915->drm,
"SSEU config not supported on gfx %x\n",
"Unable to copy global sseu parameter\n");
return -EFAULT;
}
-
- ret = get_sseu_config(&props->sseu, props->engine, &user_sseu);
- if (ret) {
- drm_dbg(&perf->i915->drm,
- "Invalid SSEU configuration\n");
- return ret;
- }
- props->has_sseu = true;
+ config_sseu = true;
break;
}
case DRM_I915_PERF_PROP_POLL_OA_PERIOD:
}
props->poll_oa_period = value;
break;
- case DRM_I915_PERF_PROP_MAX:
+ case DRM_I915_PERF_PROP_OA_ENGINE_CLASS:
+ class = (u8)value;
+ config_class = true;
+ break;
+ case DRM_I915_PERF_PROP_OA_ENGINE_INSTANCE:
+ instance = (u8)value;
+ config_instance = true;
+ break;
+ default:
MISSING_CASE(id);
return -EINVAL;
}
uprop += 2;
}
+ if ((config_class && !config_instance) ||
+ (config_instance && !config_class)) {
+ drm_dbg(&perf->i915->drm,
+ "OA engine-class and engine-instance parameters must be passed together\n");
+ return -EINVAL;
+ }
+
+ props->engine = intel_engine_lookup_user(perf->i915, class, instance);
+ if (!props->engine) {
+ drm_dbg(&perf->i915->drm,
+ "OA engine class and instance invalid %d:%d\n",
+ class, instance);
+ return -EINVAL;
+ }
+
+ if (!engine_supports_oa(props->engine)) {
+ drm_dbg(&perf->i915->drm,
+ "Engine not supported by OA %d:%d\n",
+ class, instance);
+ return -EINVAL;
+ }
+
+ /*
+ * Wa_14017512683: mtl[a0..c0): Use of OAM must be preceded with Media
+ * C6 disable in BIOS. Fail if Media C6 is enabled on steppings where OAM
+ * does not work as expected.
+ */
+ if (IS_MTL_MEDIA_STEP(props->engine->i915, STEP_A0, STEP_C0) &&
+ props->engine->oa_group->type == TYPE_OAM &&
+ intel_check_bios_c6_setup(&props->engine->gt->rc6)) {
+ drm_dbg(&perf->i915->drm,
+ "OAM requires media C6 to be disabled in BIOS\n");
+ return -EINVAL;
+ }
+
+ i = array_index_nospec(props->oa_format, I915_OA_FORMAT_MAX);
+ f = &perf->oa_formats[i];
+ if (!engine_supports_oa_format(props->engine, f->type)) {
+ drm_dbg(&perf->i915->drm,
+ "Invalid OA format %d for class %d\n",
+ f->type, props->engine->class);
+ return -EINVAL;
+ }
+
+ if (config_sseu) {
+ ret = get_sseu_config(&props->sseu, props->engine, &user_sseu);
+ if (ret) {
+ drm_dbg(&perf->i915->drm,
+ "Invalid SSEU configuration\n");
+ return ret;
+ }
+ props->has_sseu = true;
+ }
+
return 0;
}
{}
};
+static const struct i915_range mtl_oam_b_counters[] = {
+ { .start = 0x393000, .end = 0x39301c }, /* GEN12_OAM_STARTTRIG1[1-8] */
+ { .start = 0x393020, .end = 0x39303c }, /* GEN12_OAM_REPORTTRIG1[1-8] */
+ { .start = 0x393040, .end = 0x39307c }, /* GEN12_OAM_CEC[0-7][0-1] */
+ { .start = 0x393200, .end = 0x39323C }, /* MPES[0-7] */
+ {}
+};
+
static const struct i915_range xehp_oa_b_counters[] = {
{ .start = 0xdc48, .end = 0xdc48 }, /* OAA_ENABLE_REG */
{ .start = 0xdd00, .end = 0xdd48 }, /* OAG_LCE0_0 - OAA_LENABLE_REG */
{ .start = 0x0d0c, .end = 0x0d2c }, /* NOA_CONFIG[0-8] */
{ .start = 0x9840, .end = 0x9840 }, /* GDT_CHICKEN_BITS */
{ .start = 0x9884, .end = 0x9888 }, /* NOA_WRITE */
+ { .start = 0x38d100, .end = 0x38d114}, /* VISACTL */
+ {}
};
static bool gen7_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr)
return reg_in_range_table(addr, gen12_oa_b_counters);
}
+static bool mtl_is_valid_oam_b_counter_addr(struct i915_perf *perf, u32 addr)
+{
+ if (HAS_OAM(perf->i915) &&
+ GRAPHICS_VER_FULL(perf->i915) >= IP_VER(12, 70))
+ return reg_in_range_table(addr, mtl_oam_b_counters);
+
+ return false;
+}
+
static bool xehp_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr)
{
return reg_in_range_table(addr, xehp_oa_b_counters) ||
- reg_in_range_table(addr, gen12_oa_b_counters);
+ reg_in_range_table(addr, gen12_oa_b_counters) ||
+ mtl_is_valid_oam_b_counter_addr(perf, addr);
}
static bool gen12_is_valid_mux_addr(struct i915_perf *perf, u32 addr)
err = oa_config->id;
goto sysfs_err;
}
-
- mutex_unlock(&perf->metrics_lock);
+ id = oa_config->id;
drm_dbg(&perf->i915->drm,
"Added config %s id=%i\n", oa_config->uuid, oa_config->id);
+ mutex_unlock(&perf->metrics_lock);
- return oa_config->id;
+ return id;
sysfs_err:
mutex_unlock(&perf->metrics_lock);
{}
};
+static u32 num_perf_groups_per_gt(struct intel_gt *gt)
+{
+ return 1;
+}
+
+static u32 __oam_engine_group(struct intel_engine_cs *engine)
+{
+ if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 70)) {
+ /*
+ * There's 1 SAMEDIA gt and 1 OAM per SAMEDIA gt. All media slices
+ * within the gt use the same OAM. All MTL SKUs list 1 SA MEDIA.
+ */
+ drm_WARN_ON(&engine->i915->drm,
+ engine->gt->type != GT_MEDIA);
+
+ return PERF_GROUP_OAM_SAMEDIA_0;
+ }
+
+ return PERF_GROUP_INVALID;
+}
+
+static u32 __oa_engine_group(struct intel_engine_cs *engine)
+{
+ switch (engine->class) {
+ case RENDER_CLASS:
+ return PERF_GROUP_OAG;
+
+ case VIDEO_DECODE_CLASS:
+ case VIDEO_ENHANCEMENT_CLASS:
+ return __oam_engine_group(engine);
+
+ default:
+ return PERF_GROUP_INVALID;
+ }
+}
+
+static struct i915_perf_regs __oam_regs(u32 base)
+{
+ return (struct i915_perf_regs) {
+ base,
+ GEN12_OAM_HEAD_POINTER(base),
+ GEN12_OAM_TAIL_POINTER(base),
+ GEN12_OAM_BUFFER(base),
+ GEN12_OAM_CONTEXT_CONTROL(base),
+ GEN12_OAM_CONTROL(base),
+ GEN12_OAM_DEBUG(base),
+ GEN12_OAM_STATUS(base),
+ GEN12_OAM_CONTROL_COUNTER_FORMAT_SHIFT,
+ };
+}
+
+static struct i915_perf_regs __oag_regs(void)
+{
+ return (struct i915_perf_regs) {
+ 0,
+ GEN12_OAG_OAHEADPTR,
+ GEN12_OAG_OATAILPTR,
+ GEN12_OAG_OABUFFER,
+ GEN12_OAG_OAGLBCTXCTRL,
+ GEN12_OAG_OACONTROL,
+ GEN12_OAG_OA_DEBUG,
+ GEN12_OAG_OASTATUS,
+ GEN12_OAG_OACONTROL_OA_COUNTER_FORMAT_SHIFT,
+ };
+}
+
+static void oa_init_groups(struct intel_gt *gt)
+{
+ int i, num_groups = gt->perf.num_perf_groups;
+
+ for (i = 0; i < num_groups; i++) {
+ struct i915_perf_group *g = >->perf.group[i];
+
+ /* Fused off engines can result in a group with num_engines == 0 */
+ if (g->num_engines == 0)
+ continue;
+
+ if (i == PERF_GROUP_OAG && gt->type != GT_MEDIA) {
+ g->regs = __oag_regs();
+ g->type = TYPE_OAG;
+ } else if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70)) {
+ g->regs = __oam_regs(mtl_oa_base[i]);
+ g->type = TYPE_OAM;
+ }
+ }
+}
+
+static int oa_init_gt(struct intel_gt *gt)
+{
+ u32 num_groups = num_perf_groups_per_gt(gt);
+ struct intel_engine_cs *engine;
+ struct i915_perf_group *g;
+ intel_engine_mask_t tmp;
+
+ g = kcalloc(num_groups, sizeof(*g), GFP_KERNEL);
+ if (!g)
+ return -ENOMEM;
+
+ for_each_engine_masked(engine, gt, ALL_ENGINES, tmp) {
+ u32 index = __oa_engine_group(engine);
+
+ engine->oa_group = NULL;
+ if (index < num_groups) {
+ g[index].num_engines++;
+ engine->oa_group = &g[index];
+ }
+ }
+
+ gt->perf.num_perf_groups = num_groups;
+ gt->perf.group = g;
+
+ oa_init_groups(gt);
+
+ return 0;
+}
+
+static int oa_init_engine_groups(struct i915_perf *perf)
+{
+ struct intel_gt *gt;
+ int i, ret;
+
+ for_each_gt(gt, perf->i915, i) {
+ ret = oa_init_gt(gt);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
static void oa_init_supported_formats(struct i915_perf *perf)
{
struct drm_i915_private *i915 = perf->i915;
break;
case INTEL_DG2:
+ oa_format_add(perf, I915_OAR_FORMAT_A32u40_A4u32_B8_C8);
+ oa_format_add(perf, I915_OA_FORMAT_A24u40_A14u32_B8_C8);
+ break;
+
case INTEL_METEORLAKE:
oa_format_add(perf, I915_OAR_FORMAT_A32u40_A4u32_B8_C8);
oa_format_add(perf, I915_OA_FORMAT_A24u40_A14u32_B8_C8);
+ oa_format_add(perf, I915_OAM_FORMAT_MPEC8u64_B8_C8);
+ oa_format_add(perf, I915_OAM_FORMAT_MPEC8u32_B8_C8);
break;
default:
* Note: i915-perf initialization is split into an 'init' and 'register'
* phase with the i915_perf_register() exposing state to userspace.
*/
-void i915_perf_init(struct drm_i915_private *i915)
+int i915_perf_init(struct drm_i915_private *i915)
{
struct i915_perf *perf = &i915->perf;
if (perf->ops.enable_metric_set) {
struct intel_gt *gt;
- int i;
+ int i, ret;
for_each_gt(gt, i915, i)
mutex_init(>->perf.lock);
perf->i915 = i915;
+ ret = oa_init_engine_groups(perf);
+ if (ret) {
+ drm_err(&i915->drm,
+ "OA initialization failed %d\n", ret);
+ return ret;
+ }
+
oa_init_supported_formats(perf);
}
+
+ return 0;
}
static int destroy_config(int id, void *p, void *data)
void i915_perf_fini(struct drm_i915_private *i915)
{
struct i915_perf *perf = &i915->perf;
+ struct intel_gt *gt;
+ int i;
if (!perf->i915)
return;
+ for_each_gt(gt, perf->i915, i)
+ kfree(gt->perf.group);
+
idr_for_each(&perf->metrics_idr, destroy_config, perf);
idr_destroy(&perf->metrics_idr);
*
* This version number is used by userspace to detect available features.
*/
-int i915_perf_ioctl_version(void)
+int i915_perf_ioctl_version(struct drm_i915_private *i915)
{
/*
* 1: Initial version
*
* 5: Add DRM_I915_PERF_PROP_POLL_OA_PERIOD parameter that controls the
* interval for the hrtimer used to check for OA data.
+ *
+ * 6: Add DRM_I915_PERF_PROP_OA_ENGINE_CLASS and
+ * DRM_I915_PERF_PROP_OA_ENGINE_INSTANCE
+ *
+ * 7: Add support for video decode and enhancement classes.
+ */
+
+ /*
+ * Wa_14017512683: mtl[a0..c0): Use of OAM must be preceded with Media
+ * C6 disable in BIOS. If Media C6 is enabled in BIOS, return version 6
+ * to indicate that OA media is not supported.
*/
- return 5;
+ if (IS_MTL_MEDIA_STEP(i915, STEP_A0, STEP_C0)) {
+ struct intel_gt *gt;
+ int i;
+
+ for_each_gt(gt, i915, i) {
+ if (gt->type == GT_MEDIA &&
+ intel_check_bios_c6_setup(>->rc6))
+ return 6;
+ }
+ }
+
+ return 7;
}
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
struct intel_context;
struct intel_engine_cs;
-void i915_perf_init(struct drm_i915_private *i915);
+int i915_perf_init(struct drm_i915_private *i915);
void i915_perf_fini(struct drm_i915_private *i915);
void i915_perf_register(struct drm_i915_private *i915);
void i915_perf_unregister(struct drm_i915_private *i915);
-int i915_perf_ioctl_version(void);
+int i915_perf_ioctl_version(struct drm_i915_private *i915);
int i915_perf_sysctl_register(void);
void i915_perf_sysctl_unregister(void);
#define GEN12_SQCNT1_PMON_ENABLE REG_BIT(30)
#define GEN12_SQCNT1_OABPC REG_BIT(29)
+/* Gen12 OAM unit */
+#define GEN12_OAM_HEAD_POINTER_OFFSET (0x1a0)
+#define GEN12_OAM_HEAD_POINTER_MASK 0xffffffc0
+
+#define GEN12_OAM_TAIL_POINTER_OFFSET (0x1a4)
+#define GEN12_OAM_TAIL_POINTER_MASK 0xffffffc0
+
+#define GEN12_OAM_BUFFER_OFFSET (0x1a8)
+#define GEN12_OAM_BUFFER_SIZE_MASK (0x7)
+#define GEN12_OAM_BUFFER_SIZE_SHIFT (3)
+#define GEN12_OAM_BUFFER_MEMORY_SELECT REG_BIT(0) /* 0: PPGTT, 1: GGTT */
+
+#define GEN12_OAM_CONTEXT_CONTROL_OFFSET (0x1bc)
+#define GEN12_OAM_CONTEXT_CONTROL_TIMER_PERIOD_SHIFT 2
+#define GEN12_OAM_CONTEXT_CONTROL_TIMER_ENABLE REG_BIT(1)
+#define GEN12_OAM_CONTEXT_CONTROL_COUNTER_RESUME REG_BIT(0)
+
+#define GEN12_OAM_CONTROL_OFFSET (0x194)
+#define GEN12_OAM_CONTROL_COUNTER_FORMAT_SHIFT 1
+#define GEN12_OAM_CONTROL_COUNTER_ENABLE REG_BIT(0)
+
+#define GEN12_OAM_DEBUG_OFFSET (0x198)
+#define GEN12_OAM_DEBUG_BUFFER_SIZE_SELECT REG_BIT(12)
+#define GEN12_OAM_DEBUG_INCLUDE_CLK_RATIO REG_BIT(6)
+#define GEN12_OAM_DEBUG_DISABLE_CLK_RATIO_REPORTS REG_BIT(5)
+#define GEN12_OAM_DEBUG_DISABLE_GO_1_0_REPORTS REG_BIT(2)
+#define GEN12_OAM_DEBUG_DISABLE_CTX_SWITCH_REPORTS REG_BIT(1)
+
+#define GEN12_OAM_STATUS_OFFSET (0x19c)
+#define GEN12_OAM_STATUS_COUNTER_OVERFLOW REG_BIT(2)
+#define GEN12_OAM_STATUS_BUFFER_OVERFLOW REG_BIT(1)
+#define GEN12_OAM_STATUS_REPORT_LOST REG_BIT(0)
+
+#define GEN12_OAM_MMIO_TRG_OFFSET (0x1d0)
+
+#define GEN12_OAM_MMIO_TRG(base) \
+ _MMIO((base) + GEN12_OAM_MMIO_TRG_OFFSET)
+
+#define GEN12_OAM_HEAD_POINTER(base) \
+ _MMIO((base) + GEN12_OAM_HEAD_POINTER_OFFSET)
+#define GEN12_OAM_TAIL_POINTER(base) \
+ _MMIO((base) + GEN12_OAM_TAIL_POINTER_OFFSET)
+#define GEN12_OAM_BUFFER(base) \
+ _MMIO((base) + GEN12_OAM_BUFFER_OFFSET)
+#define GEN12_OAM_CONTEXT_CONTROL(base) \
+ _MMIO((base) + GEN12_OAM_CONTEXT_CONTROL_OFFSET)
+#define GEN12_OAM_CONTROL(base) \
+ _MMIO((base) + GEN12_OAM_CONTROL_OFFSET)
+#define GEN12_OAM_DEBUG(base) \
+ _MMIO((base) + GEN12_OAM_DEBUG_OFFSET)
+#define GEN12_OAM_STATUS(base) \
+ _MMIO((base) + GEN12_OAM_STATUS_OFFSET)
+
+#define GEN12_OAM_CEC0_0_OFFSET (0x40)
+#define GEN12_OAM_CEC7_1_OFFSET (0x7c)
+#define GEN12_OAM_CEC0_0(base) \
+ _MMIO((base) + GEN12_OAM_CEC0_0_OFFSET)
+#define GEN12_OAM_CEC7_1(base) \
+ _MMIO((base) + GEN12_OAM_CEC7_1_OFFSET)
+
+#define GEN12_OAM_STARTTRIG1_OFFSET (0x00)
+#define GEN12_OAM_STARTTRIG8_OFFSET (0x1c)
+#define GEN12_OAM_STARTTRIG1(base) \
+ _MMIO((base) + GEN12_OAM_STARTTRIG1_OFFSET)
+#define GEN12_OAM_STARTTRIG8(base) \
+ _MMIO((base) + GEN12_OAM_STARTTRIG8_OFFSET)
+
+#define GEN12_OAM_REPORTTRIG1_OFFSET (0x20)
+#define GEN12_OAM_REPORTTRIG8_OFFSET (0x3c)
+#define GEN12_OAM_REPORTTRIG1(base) \
+ _MMIO((base) + GEN12_OAM_REPORTTRIG1_OFFSET)
+#define GEN12_OAM_REPORTTRIG8(base) \
+ _MMIO((base) + GEN12_OAM_REPORTTRIG8_OFFSET)
+
+#define GEN12_OAM_PERF_COUNTER_B0_OFFSET (0x84)
+#define GEN12_OAM_PERF_COUNTER_B(base, idx) \
+ _MMIO((base) + GEN12_OAM_PERF_COUNTER_B0_OFFSET + 4 * (idx))
+
#endif /* __INTEL_PERF_OA_REGS__ */
#include <linux/wait.h>
#include <uapi/drm/i915_drm.h>
+#include "gt/intel_engine_types.h"
#include "gt/intel_sseu.h"
#include "i915_reg_defs.h"
+#include "intel_uncore.h"
#include "intel_wakeref.h"
struct drm_i915_private;
struct intel_context;
struct intel_engine_cs;
+enum {
+ PERF_GROUP_OAG = 0,
+ PERF_GROUP_OAM_SAMEDIA_0 = 0,
+
+ PERF_GROUP_MAX,
+ PERF_GROUP_INVALID = U32_MAX,
+};
+
+enum report_header {
+ HDR_32_BIT = 0,
+ HDR_64_BIT,
+};
+
+struct i915_perf_regs {
+ u32 base;
+ i915_reg_t oa_head_ptr;
+ i915_reg_t oa_tail_ptr;
+ i915_reg_t oa_buffer;
+ i915_reg_t oa_ctx_ctrl;
+ i915_reg_t oa_ctrl;
+ i915_reg_t oa_debug;
+ i915_reg_t oa_status;
+ u32 oa_ctrl_counter_format_shift;
+};
+
+enum oa_type {
+ TYPE_OAG,
+ TYPE_OAM,
+};
+
struct i915_oa_format {
u32 format;
int size;
+ int type;
+ enum report_header header;
};
struct i915_oa_reg {
* buffer should be checked for available data.
*/
u64 poll_oa_period;
+
+ /**
+ * @override_gucrc: GuC RC has been overridden for the perf stream,
+ * and we need to restore the default configuration on release.
+ */
+ bool override_gucrc;
};
/**
u32 (*oa_hw_tail_read)(struct i915_perf_stream *stream);
};
+struct i915_perf_group {
+ /*
+ * @exclusive_stream: The stream currently using the OA unit. This is
+ * sometimes accessed outside a syscall associated to its file
+ * descriptor.
+ */
+ struct i915_perf_stream *exclusive_stream;
+
+ /*
+ * @num_engines: The number of engines using this OA unit.
+ */
+ u32 num_engines;
+
+ /*
+ * @regs: OA buffer register group for programming the OA unit.
+ */
+ struct i915_perf_regs regs;
+
+ /*
+ * @type: Type of OA unit - OAM, OAG etc.
+ */
+ enum oa_type type;
+};
+
struct i915_perf_gt {
/*
* Lock associated with anything below within this structure.
*/
struct intel_sseu sseu;
+ /**
+ * @num_perf_groups: number of perf groups per gt.
+ */
+ u32 num_perf_groups;
+
/*
- * @exclusive_stream: The stream currently using the OA unit. This is
- * sometimes accessed outside a syscall associated to its file
- * descriptor.
+ * @group: list of OA groups - one for each OA buffer.
*/
- struct i915_perf_stream *exclusive_stream;
+ struct i915_perf_group *group;
};
struct i915_perf {
* case we assume the system is running at the intended
* frequency. Fortunately, the read should rarely fail!
*/
- val = intel_rps_read_rpstat_fw(rps);
- if (val)
- val = intel_rps_get_cagf(rps, val);
- else
- val = rps->cur_freq;
+ val = intel_rps_read_actual_frequency_fw(rps);
+ if (!val)
+ val = intel_gpu_freq(rps, rps->cur_freq);
add_sample_mult(&pmu->sample[__I915_SAMPLE_FREQ_ACT],
- intel_gpu_freq(rps, val), period_ns / 1000);
+ val, period_ns / 1000);
}
if (pmu->enable & config_mask(I915_PMU_REQUESTED_FREQUENCY)) {
#define DG1_GSC_HECI2_BASE 0x00259000
#define DG2_GSC_HECI1_BASE 0x00373000
#define DG2_GSC_HECI2_BASE 0x00374000
-
-
+#define MTL_GSC_HECI1_BASE 0x00116000
+#define MTL_GSC_HECI2_BASE 0x00117000
+
+#define HECI_H_CSR(base) _MMIO((base) + 0x4)
+#define HECI_H_CSR_IE REG_BIT(0)
+#define HECI_H_CSR_IS REG_BIT(1)
+#define HECI_H_CSR_IG REG_BIT(2)
+#define HECI_H_CSR_RDY REG_BIT(3)
+#define HECI_H_CSR_RST REG_BIT(4)
+
+#define HECI_H_GS1(base) _MMIO((base) + 0xc4c)
+#define HECI_H_GS1_ER_PREP REG_BIT(0)
#define HSW_GTT_CACHE_EN _MMIO(0x4024)
#define GTT_CACHE_EN_ALL 0xF0007FFF
/**
* i915_refct_sgt_init - Initialize a struct i915_refct_sgt with default ops
* @rsgt: The struct i915_refct_sgt to initialize.
- * size: The size of the underlying memory buffer.
+ * @size: The size of the underlying memory buffer.
*/
void i915_refct_sgt_init(struct i915_refct_sgt *rsgt, size_t size)
{
/**
* i915_vma_insert - finds a slot for the vma in its address space
* @vma: the vma
+ * @ww: An optional struct i915_gem_ww_ctx
* @size: requested size in bytes (can be larger than the VMA)
* @alignment: required alignment
* @flags: mask of PIN_* flags to use
i915_vma_free(vma);
}
-/**
+/*
* i915_vma_destroy_locked - Remove all weak reference to the vma and put
* the initial reference.
*
func(has_mslice_steering); \
func(has_oa_bpc_reporting); \
func(has_oa_slice_contrib_limits); \
+ func(has_oam); \
func(has_one_eu_per_fuse_bit); \
func(has_pxp); \
func(has_rc6); \
/**
* intel_region_ttm_resource_alloc - Allocate memory resources from a region
* @mem: The memory region,
+ * @offset: BO offset
* @size: The requested size in bytes
* @flags: Allocation flags
*
static inline void
fw_domain_wait_ack_clear(const struct intel_uncore_forcewake_domain *d)
{
- if (wait_ack_clear(d, FORCEWAKE_KERNEL)) {
+ if (!wait_ack_clear(d, FORCEWAKE_KERNEL))
+ return;
+
+ if (fw_ack(d) == ~0)
+ drm_err(&d->uncore->i915->drm,
+ "%s: MMIO unreliable (forcewake register returns 0xFFFFFFFF)!\n",
+ intel_uncore_forcewake_domain_to_str(d->id));
+ else
drm_err(&d->uncore->i915->drm,
"%s: timed out waiting for forcewake ack to clear.\n",
intel_uncore_forcewake_domain_to_str(d->id));
- add_taint_for_CI(d->uncore->i915, TAINT_WARN); /* CI now unreliable */
- }
+
+ add_taint_for_CI(d->uncore->i915, TAINT_WARN); /* CI now unreliable */
}
enum ack_type {
return 0;
}
+static int sanity_check_mmio_access(struct intel_uncore *uncore)
+{
+ struct drm_i915_private *i915 = uncore->i915;
+
+ if (GRAPHICS_VER(i915) < 8)
+ return 0;
+
+ /*
+ * Sanitycheck that MMIO access to the device is working properly. If
+ * the CPU is unable to communcate with a PCI device, BAR reads will
+ * return 0xFFFFFFFF. Let's make sure the device isn't in this state
+ * before we start trying to access registers.
+ *
+ * We use the primary GT's forcewake register as our guinea pig since
+ * it's been around since HSW and it's a masked register so the upper
+ * 16 bits can never read back as 1's if device access is operating
+ * properly.
+ *
+ * If MMIO isn't working, we'll wait up to 2 seconds to see if it
+ * recovers, then give up.
+ */
+#define COND (__raw_uncore_read32(uncore, FORCEWAKE_MT) != ~0)
+ if (wait_for(COND, 2000) == -ETIMEDOUT) {
+ drm_err(&i915->drm, "Device is non-operational; MMIO access returns 0xFFFFFFFF!\n");
+ return -EIO;
+ }
+
+ return 0;
+}
+
int intel_uncore_init_mmio(struct intel_uncore *uncore)
{
struct drm_i915_private *i915 = uncore->i915;
int ret;
+ ret = sanity_check_mmio_access(uncore);
+ if (ret)
+ return ret;
+
/*
* The boot firmware initializes local memory and assesses its health.
* If memory training fails, the punit will have been instructed to
return err;
}
-static int context_sync(struct intel_context *ce)
-{
- struct i915_request *rq;
- long timeout;
-
- rq = intel_context_create_request(ce);
- if (IS_ERR(rq))
- return PTR_ERR(rq);
-
- i915_request_get(rq);
- i915_request_add(rq);
-
- timeout = i915_request_wait(rq, 0, HZ / 5);
- i915_request_put(rq);
-
- return timeout < 0 ? -EIO : 0;
-}
-
-static struct i915_request *
-submit_batch(struct intel_context *ce, u64 addr)
-{
- struct i915_request *rq;
- int err;
-
- rq = intel_context_create_request(ce);
- if (IS_ERR(rq))
- return rq;
-
- err = 0;
- if (rq->engine->emit_init_breadcrumb) /* detect a hang */
- err = rq->engine->emit_init_breadcrumb(rq);
- if (err == 0)
- err = rq->engine->emit_bb_start(rq, addr, 0, 0);
-
- if (err == 0)
- i915_request_get(rq);
- i915_request_add(rq);
-
- return err ? ERR_PTR(err) : rq;
-}
-
-static u32 *spinner(u32 *batch, int i)
-{
- return batch + i * 64 / sizeof(*batch) + 4;
-}
-
-static void end_spin(u32 *batch, int i)
-{
- *spinner(batch, i) = MI_BATCH_BUFFER_END;
- wmb();
-}
-
-static int igt_cs_tlb(void *arg)
-{
- const unsigned int count = PAGE_SIZE / 64;
- const unsigned int chunk_size = count * PAGE_SIZE;
- struct drm_i915_private *i915 = arg;
- struct drm_i915_gem_object *bbe, *act, *out;
- struct i915_gem_engines_iter it;
- struct i915_address_space *vm;
- struct i915_gem_context *ctx;
- struct intel_context *ce;
- struct i915_vma *vma;
- I915_RND_STATE(prng);
- struct file *file;
- unsigned int i;
- u32 *result;
- u32 *batch;
- int err = 0;
-
- /*
- * Our mission here is to fool the hardware to execute something
- * from scratch as it has not seen the batch move (due to missing
- * the TLB invalidate).
- */
-
- file = mock_file(i915);
- if (IS_ERR(file))
- return PTR_ERR(file);
-
- ctx = live_context(i915, file);
- if (IS_ERR(ctx)) {
- err = PTR_ERR(ctx);
- goto out_unlock;
- }
-
- vm = i915_gem_context_get_eb_vm(ctx);
- if (i915_is_ggtt(vm))
- goto out_vm;
-
- /* Create two pages; dummy we prefill the TLB, and intended */
- bbe = i915_gem_object_create_internal(i915, PAGE_SIZE);
- if (IS_ERR(bbe)) {
- err = PTR_ERR(bbe);
- goto out_vm;
- }
-
- batch = i915_gem_object_pin_map_unlocked(bbe, I915_MAP_WC);
- if (IS_ERR(batch)) {
- err = PTR_ERR(batch);
- goto out_put_bbe;
- }
- memset32(batch, MI_BATCH_BUFFER_END, PAGE_SIZE / sizeof(u32));
- i915_gem_object_flush_map(bbe);
- i915_gem_object_unpin_map(bbe);
-
- act = i915_gem_object_create_internal(i915, PAGE_SIZE);
- if (IS_ERR(act)) {
- err = PTR_ERR(act);
- goto out_put_bbe;
- }
-
- /* Track the execution of each request by writing into different slot */
- batch = i915_gem_object_pin_map_unlocked(act, I915_MAP_WC);
- if (IS_ERR(batch)) {
- err = PTR_ERR(batch);
- goto out_put_act;
- }
- for (i = 0; i < count; i++) {
- u32 *cs = batch + i * 64 / sizeof(*cs);
- u64 addr = (vm->total - PAGE_SIZE) + i * sizeof(u32);
-
- GEM_BUG_ON(GRAPHICS_VER(i915) < 6);
- cs[0] = MI_STORE_DWORD_IMM_GEN4;
- if (GRAPHICS_VER(i915) >= 8) {
- cs[1] = lower_32_bits(addr);
- cs[2] = upper_32_bits(addr);
- cs[3] = i;
- cs[4] = MI_NOOP;
- cs[5] = MI_BATCH_BUFFER_START_GEN8;
- } else {
- cs[1] = 0;
- cs[2] = lower_32_bits(addr);
- cs[3] = i;
- cs[4] = MI_NOOP;
- cs[5] = MI_BATCH_BUFFER_START;
- }
- }
-
- out = i915_gem_object_create_internal(i915, PAGE_SIZE);
- if (IS_ERR(out)) {
- err = PTR_ERR(out);
- goto out_put_batch;
- }
- i915_gem_object_set_cache_coherency(out, I915_CACHING_CACHED);
-
- vma = i915_vma_instance(out, vm, NULL);
- if (IS_ERR(vma)) {
- err = PTR_ERR(vma);
- goto out_put_out;
- }
-
- err = i915_vma_pin(vma, 0, 0,
- PIN_USER |
- PIN_OFFSET_FIXED |
- (vm->total - PAGE_SIZE));
- if (err)
- goto out_put_out;
- GEM_BUG_ON(vma->node.start != vm->total - PAGE_SIZE);
-
- result = i915_gem_object_pin_map_unlocked(out, I915_MAP_WB);
- if (IS_ERR(result)) {
- err = PTR_ERR(result);
- goto out_put_out;
- }
-
- for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) {
- IGT_TIMEOUT(end_time);
- unsigned long pass = 0;
-
- if (!intel_engine_can_store_dword(ce->engine))
- continue;
-
- while (!__igt_timeout(end_time, NULL)) {
- struct i915_vm_pt_stash stash = {};
- struct i915_request *rq;
- struct i915_gem_ww_ctx ww;
- struct i915_vma_resource *vma_res;
- u64 offset;
-
- offset = igt_random_offset(&prng,
- 0, vm->total - PAGE_SIZE,
- chunk_size, PAGE_SIZE);
-
- memset32(result, STACK_MAGIC, PAGE_SIZE / sizeof(u32));
-
- vma = i915_vma_instance(bbe, vm, NULL);
- if (IS_ERR(vma)) {
- err = PTR_ERR(vma);
- goto end;
- }
-
- i915_gem_object_lock(bbe, NULL);
- err = i915_vma_get_pages(vma);
- i915_gem_object_unlock(bbe);
- if (err)
- goto end;
-
- vma_res = i915_vma_resource_alloc();
- if (IS_ERR(vma_res)) {
- i915_vma_put_pages(vma);
- err = PTR_ERR(vma_res);
- goto end;
- }
-
- i915_gem_ww_ctx_init(&ww, false);
-retry:
- err = i915_vm_lock_objects(vm, &ww);
- if (err)
- goto end_ww;
-
- err = i915_vm_alloc_pt_stash(vm, &stash, chunk_size);
- if (err)
- goto end_ww;
-
- err = i915_vm_map_pt_stash(vm, &stash);
- if (!err)
- vm->allocate_va_range(vm, &stash, offset, chunk_size);
- i915_vm_free_pt_stash(vm, &stash);
-end_ww:
- if (err == -EDEADLK) {
- err = i915_gem_ww_ctx_backoff(&ww);
- if (!err)
- goto retry;
- }
- i915_gem_ww_ctx_fini(&ww);
- if (err) {
- kfree(vma_res);
- goto end;
- }
-
- i915_vma_resource_init_from_vma(vma_res, vma);
- /* Prime the TLB with the dummy pages */
- for (i = 0; i < count; i++) {
- vma_res->start = offset + i * PAGE_SIZE;
- vm->insert_entries(vm, vma_res, I915_CACHE_NONE,
- 0);
-
- rq = submit_batch(ce, vma_res->start);
- if (IS_ERR(rq)) {
- err = PTR_ERR(rq);
- i915_vma_resource_fini(vma_res);
- kfree(vma_res);
- goto end;
- }
- i915_request_put(rq);
- }
- i915_vma_resource_fini(vma_res);
- i915_vma_put_pages(vma);
-
- err = context_sync(ce);
- if (err) {
- pr_err("%s: dummy setup timed out\n",
- ce->engine->name);
- kfree(vma_res);
- goto end;
- }
-
- vma = i915_vma_instance(act, vm, NULL);
- if (IS_ERR(vma)) {
- kfree(vma_res);
- err = PTR_ERR(vma);
- goto end;
- }
-
- i915_gem_object_lock(act, NULL);
- err = i915_vma_get_pages(vma);
- i915_gem_object_unlock(act);
- if (err) {
- kfree(vma_res);
- goto end;
- }
-
- i915_vma_resource_init_from_vma(vma_res, vma);
- /* Replace the TLB with target batches */
- for (i = 0; i < count; i++) {
- struct i915_request *rq;
- u32 *cs = batch + i * 64 / sizeof(*cs);
- u64 addr;
-
- vma_res->start = offset + i * PAGE_SIZE;
- vm->insert_entries(vm, vma_res, I915_CACHE_NONE, 0);
-
- addr = vma_res->start + i * 64;
- cs[4] = MI_NOOP;
- cs[6] = lower_32_bits(addr);
- cs[7] = upper_32_bits(addr);
- wmb();
-
- rq = submit_batch(ce, addr);
- if (IS_ERR(rq)) {
- err = PTR_ERR(rq);
- i915_vma_resource_fini(vma_res);
- kfree(vma_res);
- goto end;
- }
-
- /* Wait until the context chain has started */
- if (i == 0) {
- while (READ_ONCE(result[i]) &&
- !i915_request_completed(rq))
- cond_resched();
- } else {
- end_spin(batch, i - 1);
- }
-
- i915_request_put(rq);
- }
- end_spin(batch, count - 1);
-
- i915_vma_resource_fini(vma_res);
- kfree(vma_res);
- i915_vma_put_pages(vma);
-
- err = context_sync(ce);
- if (err) {
- pr_err("%s: writes timed out\n",
- ce->engine->name);
- goto end;
- }
-
- for (i = 0; i < count; i++) {
- if (result[i] != i) {
- pr_err("%s: Write lost on pass %lu, at offset %llx, index %d, found %x, expected %x\n",
- ce->engine->name, pass,
- offset, i, result[i], i);
- err = -EINVAL;
- goto end;
- }
- }
-
- vm->clear_range(vm, offset, chunk_size);
- pass++;
- }
- }
-end:
- if (igt_flush_test(i915))
- err = -EIO;
- i915_gem_context_unlock_engines(ctx);
- i915_gem_object_unpin_map(out);
-out_put_out:
- i915_gem_object_put(out);
-out_put_batch:
- i915_gem_object_unpin_map(act);
-out_put_act:
- i915_gem_object_put(act);
-out_put_bbe:
- i915_gem_object_put(bbe);
-out_vm:
- i915_vm_put(vm);
-out_unlock:
- fput(file);
- return err;
-}
-
int i915_gem_gtt_live_selftests(struct drm_i915_private *i915)
{
static const struct i915_subtest tests[] = {
SUBTEST(igt_ggtt_fill),
SUBTEST(igt_ggtt_page),
SUBTEST(igt_ggtt_misaligned_pin),
- SUBTEST(igt_cs_tlb),
};
GEM_BUG_ON(offset_in_page(to_gt(i915)->ggtt->vm.total));
#define I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE 0 /* see i915_context_engines_load_balance */
#define I915_CONTEXT_ENGINES_EXT_BOND 1 /* see i915_context_engines_bond */
#define I915_CONTEXT_ENGINES_EXT_PARALLEL_SUBMIT 2 /* see i915_context_engines_parallel_submit */
- struct i915_engine_class_instance engines[0];
+ struct i915_engine_class_instance engines[];
} __attribute__((packed));
#define I915_DEFINE_CONTEXT_PARAM_ENGINES(name__, N__) struct { \
I915_OAR_FORMAT_A32u40_A4u32_B8_C8,
I915_OA_FORMAT_A24u40_A14u32_B8_C8,
+ /* MTL OAM */
+ I915_OAM_FORMAT_MPEC8u64_B8_C8,
+ I915_OAM_FORMAT_MPEC8u32_B8_C8,
+
I915_OA_FORMAT_MAX /* non-ABI */
};
*/
DRM_I915_PERF_PROP_POLL_OA_PERIOD,
+ /**
+ * Multiple engines may be mapped to the same OA unit. The OA unit is
+ * identified by class:instance of any engine mapped to it.
+ *
+ * This parameter specifies the engine class and must be passed along
+ * with DRM_I915_PERF_PROP_OA_ENGINE_INSTANCE.
+ *
+ * This property is available in perf revision 6.
+ */
+ DRM_I915_PERF_PROP_OA_ENGINE_CLASS,
+
+ /**
+ * This parameter specifies the engine instance and must be passed along
+ * with DRM_I915_PERF_PROP_OA_ENGINE_CLASS.
+ *
+ * This property is available in perf revision 6.
+ */
+ DRM_I915_PERF_PROP_OA_ENGINE_INSTANCE,
+
DRM_I915_PERF_PROP_MAX /* non-ABI */
};