From c4a2e5f865b7230a9565c57c54b375b1d2ce82f5 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 23 Apr 2025 10:17:22 -0700 Subject: [PATCH] drm/xe: Add devcoredump chunking Chunk devcoredump into 1.5G pieces to avoid hitting the kvmalloc limit of 2G. Simple algorithm reads 1.5G at time in xe_devcoredump_read callback as needed. Some memory allocations are changed to GFP_ATOMIC as they done in xe_devcoredump_read which holds lock in the path of reclaim. The allocations are small, so in practice should never fail. v2: - Update commit message wrt gfp atomic (John H) v6: - Drop GFP_ATOMIC change for hwconfig (John H) Signed-off-by: Matthew Brost Reviewed-by: Jonathan Cavitt Link: https://lore.kernel.org/r/20250423171725.597955-2-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_devcoredump.c | 59 ++++++++++++++++++----- drivers/gpu/drm/xe/xe_devcoredump_types.h | 2 + 2 files changed, 49 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c index 81b9d9bb3f57..a9e618abf8ac 100644 --- a/drivers/gpu/drm/xe/xe_devcoredump.c +++ b/drivers/gpu/drm/xe/xe_devcoredump.c @@ -80,7 +80,8 @@ static struct xe_guc *exec_queue_to_guc(struct xe_exec_queue *q) return &q->gt->uc.guc; } -static ssize_t __xe_devcoredump_read(char *buffer, size_t count, +static ssize_t __xe_devcoredump_read(char *buffer, ssize_t count, + ssize_t start, struct xe_devcoredump *coredump) { struct xe_device *xe; @@ -94,7 +95,7 @@ static ssize_t __xe_devcoredump_read(char *buffer, size_t count, ss = &coredump->snapshot; iter.data = buffer; - iter.start = 0; + iter.start = start; iter.remain = count; p = drm_coredump_printer(&iter); @@ -168,6 +169,8 @@ static void xe_devcoredump_snapshot_free(struct xe_devcoredump_snapshot *ss) ss->vm = NULL; } +#define XE_DEVCOREDUMP_CHUNK_MAX (SZ_512M + SZ_1G) + static ssize_t xe_devcoredump_read(char *buffer, loff_t offset, size_t count, void *data, size_t datalen) { @@ -183,6 +186,9 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset, /* Ensure delayed work is captured before continuing */ flush_work(&ss->work); + if (ss->read.size > XE_DEVCOREDUMP_CHUNK_MAX) + xe_pm_runtime_get(gt_to_xe(ss->gt)); + mutex_lock(&coredump->lock); if (!ss->read.buffer) { @@ -195,12 +201,26 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset, return 0; } + if (offset >= ss->read.chunk_position + XE_DEVCOREDUMP_CHUNK_MAX || + offset < ss->read.chunk_position) { + ss->read.chunk_position = + ALIGN_DOWN(offset, XE_DEVCOREDUMP_CHUNK_MAX); + + __xe_devcoredump_read(ss->read.buffer, + XE_DEVCOREDUMP_CHUNK_MAX, + ss->read.chunk_position, coredump); + } + byte_copied = count < ss->read.size - offset ? count : ss->read.size - offset; - memcpy(buffer, ss->read.buffer + offset, byte_copied); + memcpy(buffer, ss->read.buffer + + (offset % XE_DEVCOREDUMP_CHUNK_MAX), byte_copied); mutex_unlock(&coredump->lock); + if (ss->read.size > XE_DEVCOREDUMP_CHUNK_MAX) + xe_pm_runtime_put(gt_to_xe(ss->gt)); + return byte_copied; } @@ -254,17 +274,32 @@ static void xe_devcoredump_deferred_snap_work(struct work_struct *work) xe_guc_exec_queue_snapshot_capture_delayed(ss->ge); xe_force_wake_put(gt_to_fw(ss->gt), fw_ref); - xe_pm_runtime_put(xe); + ss->read.chunk_position = 0; /* Calculate devcoredump size */ - ss->read.size = __xe_devcoredump_read(NULL, INT_MAX, coredump); - - ss->read.buffer = kvmalloc(ss->read.size, GFP_USER); - if (!ss->read.buffer) - return; + ss->read.size = __xe_devcoredump_read(NULL, LONG_MAX, 0, coredump); + + if (ss->read.size > XE_DEVCOREDUMP_CHUNK_MAX) { + ss->read.buffer = kvmalloc(XE_DEVCOREDUMP_CHUNK_MAX, + GFP_USER); + if (!ss->read.buffer) + goto put_pm; + + __xe_devcoredump_read(ss->read.buffer, + XE_DEVCOREDUMP_CHUNK_MAX, + 0, coredump); + } else { + ss->read.buffer = kvmalloc(ss->read.size, GFP_USER); + if (!ss->read.buffer) + goto put_pm; + + __xe_devcoredump_read(ss->read.buffer, ss->read.size, 0, + coredump); + xe_devcoredump_snapshot_free(ss); + } - __xe_devcoredump_read(ss->read.buffer, ss->read.size, coredump); - xe_devcoredump_snapshot_free(ss); +put_pm: + xe_pm_runtime_put(xe); } static void devcoredump_snapshot(struct xe_devcoredump *coredump, @@ -425,7 +460,7 @@ void xe_print_blob_ascii85(struct drm_printer *p, const char *prefix, char suffi if (offset & 3) drm_printf(p, "Offset not word aligned: %zu", offset); - line_buff = kzalloc(DMESG_MAX_LINE_LEN, GFP_KERNEL); + line_buff = kzalloc(DMESG_MAX_LINE_LEN, GFP_ATOMIC); if (!line_buff) { drm_printf(p, "Failed to allocate line buffer\n"); return; diff --git a/drivers/gpu/drm/xe/xe_devcoredump_types.h b/drivers/gpu/drm/xe/xe_devcoredump_types.h index 1a1d16a96b2d..a174385a6d83 100644 --- a/drivers/gpu/drm/xe/xe_devcoredump_types.h +++ b/drivers/gpu/drm/xe/xe_devcoredump_types.h @@ -66,6 +66,8 @@ struct xe_devcoredump_snapshot { struct { /** @read.size: size of devcoredump in human readable format */ ssize_t size; + /** @read.chunk_position: position of devcoredump chunk */ + ssize_t chunk_position; /** @read.buffer: buffer of devcoredump in human readable format */ char *buffer; } read; -- 2.25.1