#ifndef _XE_GPU_COMMANDS_H_
#define _XE_GPU_COMMANDS_H_
+#include "regs/xe_reg_defs.h"
+
#define INSTR_CLIENT_SHIFT 29
#define INSTR_MI_CLIENT 0x0
#define __INSTR(client) ((client) << INSTR_CLIENT_SHIFT)
#define GEN9_XY_FAST_COPY_BLT_CMD (2 << 29 | 0x42 << 22)
#define BLT_DEPTH_32 (3<<24)
+#define PVC_MEM_SET_CMD (2 << 29 | 0x5b << 22)
+#define PVC_MEM_SET_CMD_LEN_DW 7
+#define PVC_MS_MATRIX REG_BIT(17)
+#define PVC_MS_DATA_FIELD GENMASK(31, 24)
+/* Bspec lists field as [6:0], but index alone is from [6:1] */
+#define PVC_MS_MOCS_INDEX_MASK GENMASK(6, 1)
+
#define GFX_OP_PIPE_CONTROL(len) ((0x3<<29)|(0x3<<27)|(0x2<<24)|((len)-2))
#define PIPE_CONTROL_TILE_CACHE_FLUSH (1<<28)
#define PIPE_CONTROL_AMFS_FLUSH (1<<25)
/* Optionally clear bo *and* CCS data in VRAM. */
if (clear) {
- fence = xe_migrate_clear(gt->migrate, bo, bo->ttm.resource, 0);
+ fence = xe_migrate_clear(gt->migrate, bo, bo->ttm.resource);
if (IS_ERR(fence)) {
KUNIT_FAIL(test, "Failed to submit bo clear.\n");
return PTR_ERR(fence);
struct kunit *test)
{
struct xe_device *xe = gt_to_xe(m->gt);
- u64 retval, expected = 0xc0c0c0c0c0c0c0c0ULL;
+ u64 retval, expected = 0;
bool big = bo->size >= SZ_2M;
struct dma_fence *fence;
const char *str = big ? "Copying big bo" : "Copying small bo";
}
xe_map_memset(xe, &sysmem->vmap, 0, 0xd0, sysmem->size);
- fence = xe_migrate_clear(m, sysmem, sysmem->ttm.resource, 0xc0c0c0c0);
+ fence = xe_migrate_clear(m, sysmem, sysmem->ttm.resource);
if (!sanity_fence_failed(xe, fence, big ? "Clearing sysmem big bo" :
"Clearing sysmem small bo", test)) {
retval = xe_map_rd(xe, &sysmem->vmap, 0, u64);
bb->len = 0;
bb->cs[bb->len++] = MI_BATCH_BUFFER_END;
xe_map_wr(xe, &pt->vmap, 0, u32, 0xdeaddead);
- expected = 0x12345678U;
+ expected = 0;
emit_clear(m->gt, bb, xe_migrate_vm_addr(NUM_KERNEL_PDE - 1, 0), 4, 4,
- expected, IS_DGFX(xe));
+ IS_DGFX(xe));
run_sanity_job(m, xe, bb, 1, "Writing to our newly mapped pagetable",
test);
/* Clear a small bo */
kunit_info(test, "Clearing small buffer object\n");
xe_map_memset(xe, &tiny->vmap, 0, 0x22, tiny->size);
- expected = 0x224488ff;
- fence = xe_migrate_clear(m, tiny, tiny->ttm.resource, expected);
+ expected = 0;
+ fence = xe_migrate_clear(m, tiny, tiny->ttm.resource);
if (sanity_fence_failed(xe, fence, "Clearing small bo", test))
goto out;
test_copy(m, tiny, test);
}
- /* Clear a big bo with a fixed value */
+ /* Clear a big bo */
kunit_info(test, "Clearing big buffer object\n");
xe_map_memset(xe, &big->vmap, 0, 0x11, big->size);
- expected = 0x11223344U;
- fence = xe_migrate_clear(m, big, big->ttm.resource, expected);
+ expected = 0;
+ fence = xe_migrate_clear(m, big, big->ttm.resource);
if (sanity_fence_failed(xe, fence, "Clearing big bo", test))
goto out;
}
} else {
if (move_lacks_source)
- fence = xe_migrate_clear(gt->migrate, bo, new_mem, 0);
+ fence = xe_migrate_clear(gt->migrate, bo, new_mem);
else
fence = xe_migrate_copy(gt->migrate, bo, old_mem, new_mem);
if (IS_ERR(fence)) {
bool has_4tile;
/** @has_range_tlb_invalidation: Has range based TLB invalidations */
bool has_range_tlb_invalidation;
+ /** @has_link_copy_engines: Whether the platform has link copy engines */
+ bool has_link_copy_engine;
} info;
/** @irq: device interrupt state */
return fence;
}
-static int emit_clear(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs,
- u32 size, u32 pitch, u32 value, bool is_vram)
+static void emit_clear_link_copy(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs,
+ u32 size, u32 pitch)
{
+ u32 *cs = bb->cs + bb->len;
+ u32 mocs = xe_mocs_index_to_value(gt->mocs.uc_index);
+ u32 len = PVC_MEM_SET_CMD_LEN_DW;
+
+ *cs++ = PVC_MEM_SET_CMD | PVC_MS_MATRIX | (len - 2);
+ *cs++ = pitch - 1;
+ *cs++ = (size / pitch) - 1;
+ *cs++ = pitch - 1;
+ *cs++ = lower_32_bits(src_ofs);
+ *cs++ = upper_32_bits(src_ofs);
+ *cs++ = FIELD_PREP(PVC_MS_MOCS_INDEX_MASK, mocs);
+
+ XE_BUG_ON(cs - bb->cs != len + bb->len);
+
+ bb->len += len;
+}
+
+static void emit_clear_main_copy(struct xe_gt *gt, struct xe_bb *bb,
+ u64 src_ofs, u32 size, u32 pitch, bool is_vram)
+{
+ struct xe_device *xe = gt_to_xe(gt);
u32 *cs = bb->cs + bb->len;
u32 len = XY_FAST_COLOR_BLT_DW;
u32 mocs = xe_mocs_index_to_value(gt->mocs.uc_index);
- if (GRAPHICS_VERx100(gt->xe) < 1250)
+ if (GRAPHICS_VERx100(xe) < 1250)
len = 11;
*cs++ = XY_FAST_COLOR_BLT_CMD | XY_FAST_COLOR_BLT_DEPTH_32 |
*cs++ = lower_32_bits(src_ofs);
*cs++ = upper_32_bits(src_ofs);
*cs++ = (is_vram ? 0x0 : 0x1) << XY_FAST_COLOR_BLT_MEM_TYPE_SHIFT;
- *cs++ = value;
+ *cs++ = 0;
*cs++ = 0;
*cs++ = 0;
*cs++ = 0;
}
XE_BUG_ON(cs - bb->cs != len + bb->len);
+
bb->len += len;
+}
+
+static u32 emit_clear_cmd_len(struct xe_device *xe)
+{
+ if (xe->info.has_link_copy_engine)
+ return PVC_MEM_SET_CMD_LEN_DW;
+ else
+ return XY_FAST_COLOR_BLT_DW;
+}
+
+static int emit_clear(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs,
+ u32 size, u32 pitch, bool is_vram)
+{
+ struct xe_device *xe = gt_to_xe(gt);
+
+ if (xe->info.has_link_copy_engine) {
+ emit_clear_link_copy(gt, bb, src_ofs, size, pitch);
+
+ } else {
+ emit_clear_main_copy(gt, bb, src_ofs, size, pitch,
+ is_vram);
+ }
return 0;
}
* @m: The migration context.
* @bo: The buffer object @dst is currently bound to.
* @dst: The dst TTM resource to be cleared.
- * @value: Clear value.
*
- * Clear the contents of @dst. On flat CCS devices,
- * the CCS metadata is cleared to zero as well on VRAM destionations.
+ * Clear the contents of @dst to zero. On flat CCS devices,
+ * the CCS metadata is cleared to zero as well on VRAM destinations.
* TODO: Eliminate the @bo argument.
*
* Return: Pointer to a dma_fence representing the last clear batch, or
*/
struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
struct xe_bo *bo,
- struct ttm_resource *dst,
- u32 value)
+ struct ttm_resource *dst)
{
bool clear_vram = mem_type_is_vram(dst->mem_type);
struct xe_gt *gt = m->gt;
batch_size = 2 +
pte_update_size(m, clear_vram, &src_it,
&clear_L0, &clear_L0_ofs, &clear_L0_pt,
- XY_FAST_COLOR_BLT_DW, 0, NUM_PT_PER_BLIT);
+ emit_clear_cmd_len(xe), 0,
+ NUM_PT_PER_BLIT);
if (xe_device_has_flat_ccs(xe) && clear_vram)
batch_size += EMIT_COPY_CCS_DW;
update_idx = bb->len;
emit_clear(gt, bb, clear_L0_ofs, clear_L0, GEN8_PAGE_SIZE,
- value, clear_vram);
+ clear_vram);
if (xe_device_has_flat_ccs(xe) && clear_vram) {
emit_copy_ccs(gt, bb, clear_L0_ofs, true,
m->cleared_vram_ofs, false, clear_L0);
struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
struct xe_bo *bo,
- struct ttm_resource *dst,
- u32 value);
+ struct ttm_resource *dst);
struct xe_vm *xe_migrate_get_vm(struct xe_migrate *m);
bool has_4tile;
bool has_range_tlb_invalidation;
bool has_asid;
+ bool has_link_copy_engine;
};
#define PLATFORM(x) \
.vm_max_level = 4,
.supports_usm = true,
.has_asid = true,
+ .has_link_copy_engine = true,
};
#define MTL_MEDIA_ENGINES \
xe->info.has_flat_ccs = desc->has_flat_ccs;
xe->info.has_4tile = desc->has_4tile;
xe->info.has_range_tlb_invalidation = desc->has_range_tlb_invalidation;
+ xe->info.has_link_copy_engine = desc->has_link_copy_engine;
spd = subplatform_get(xe, desc);
xe->info.subplatform = spd ? spd->subplatform : XE_SUBPLATFORM_NONE;