From: John Harrison <john.c.harrison@intel.com>
To: Matthew Brost <matthew.brost@intel.com>,
<intel-xe@lists.freedesktop.org>
Subject: Re: [PATCH v3 1/4] drm/xe: Add devcoredump chunking
Date: Thu, 10 Apr 2025 10:47:26 -0700 [thread overview]
Message-ID: <f6aca466-2aae-4a67-a116-38fb93d0a274@intel.com> (raw)
In-Reply-To: <20250410040622.238389-2-matthew.brost@intel.com>
On 4/9/2025 9:06 PM, Matthew Brost wrote:
> Chunk devcoredump into 1.5G pieces to avoid hitting the kvmalloc limit
> of 2G. Simple algorithm reads 1.5G at time in xe_devcoredump_read
> callback as needed.
>
> Some memory allocations are changed to GFP_ATOMIC as they done in
> xe_devcoredump_read which holds lock in the path of reclaim. The
> allocations are small, so in practice should never fail.
>
> v2:
> - Update commit message wrt gfp atomic (John H)
>
> Signed-off-by: Matthew Brost <matthew.brost@intel.com>
> Reviewed-by: Jonathan Cavitt <jonathan.cavitt@intel.com>
> ---
> drivers/gpu/drm/xe/xe_devcoredump.c | 59 ++++++++++++++++++-----
> drivers/gpu/drm/xe/xe_devcoredump_types.h | 2 +
> drivers/gpu/drm/xe/xe_guc_hwconfig.c | 2 +-
> 3 files changed, 50 insertions(+), 13 deletions(-)
>
> diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c
> index 81b9d9bb3f57..a9e618abf8ac 100644
> --- a/drivers/gpu/drm/xe/xe_devcoredump.c
> +++ b/drivers/gpu/drm/xe/xe_devcoredump.c
> @@ -80,7 +80,8 @@ static struct xe_guc *exec_queue_to_guc(struct xe_exec_queue *q)
> return &q->gt->uc.guc;
> }
>
> -static ssize_t __xe_devcoredump_read(char *buffer, size_t count,
> +static ssize_t __xe_devcoredump_read(char *buffer, ssize_t count,
> + ssize_t start,
> struct xe_devcoredump *coredump)
> {
> struct xe_device *xe;
> @@ -94,7 +95,7 @@ static ssize_t __xe_devcoredump_read(char *buffer, size_t count,
> ss = &coredump->snapshot;
>
> iter.data = buffer;
> - iter.start = 0;
> + iter.start = start;
> iter.remain = count;
>
> p = drm_coredump_printer(&iter);
> @@ -168,6 +169,8 @@ static void xe_devcoredump_snapshot_free(struct xe_devcoredump_snapshot *ss)
> ss->vm = NULL;
> }
>
> +#define XE_DEVCOREDUMP_CHUNK_MAX (SZ_512M + SZ_1G)
> +
> static ssize_t xe_devcoredump_read(char *buffer, loff_t offset,
> size_t count, void *data, size_t datalen)
> {
> @@ -183,6 +186,9 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset,
> /* Ensure delayed work is captured before continuing */
> flush_work(&ss->work);
>
> + if (ss->read.size > XE_DEVCOREDUMP_CHUNK_MAX)
> + xe_pm_runtime_get(gt_to_xe(ss->gt));
> +
> mutex_lock(&coredump->lock);
>
> if (!ss->read.buffer) {
> @@ -195,12 +201,26 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset,
> return 0;
> }
>
> + if (offset >= ss->read.chunk_position + XE_DEVCOREDUMP_CHUNK_MAX ||
> + offset < ss->read.chunk_position) {
> + ss->read.chunk_position =
> + ALIGN_DOWN(offset, XE_DEVCOREDUMP_CHUNK_MAX);
> +
> + __xe_devcoredump_read(ss->read.buffer,
> + XE_DEVCOREDUMP_CHUNK_MAX,
> + ss->read.chunk_position, coredump);
> + }
> +
> byte_copied = count < ss->read.size - offset ? count :
> ss->read.size - offset;
> - memcpy(buffer, ss->read.buffer + offset, byte_copied);
> + memcpy(buffer, ss->read.buffer +
> + (offset % XE_DEVCOREDUMP_CHUNK_MAX), byte_copied);
>
> mutex_unlock(&coredump->lock);
>
> + if (ss->read.size > XE_DEVCOREDUMP_CHUNK_MAX)
> + xe_pm_runtime_put(gt_to_xe(ss->gt));
> +
> return byte_copied;
> }
>
> @@ -254,17 +274,32 @@ static void xe_devcoredump_deferred_snap_work(struct work_struct *work)
> xe_guc_exec_queue_snapshot_capture_delayed(ss->ge);
> xe_force_wake_put(gt_to_fw(ss->gt), fw_ref);
>
> - xe_pm_runtime_put(xe);
> + ss->read.chunk_position = 0;
>
> /* Calculate devcoredump size */
> - ss->read.size = __xe_devcoredump_read(NULL, INT_MAX, coredump);
> -
> - ss->read.buffer = kvmalloc(ss->read.size, GFP_USER);
> - if (!ss->read.buffer)
> - return;
> + ss->read.size = __xe_devcoredump_read(NULL, LONG_MAX, 0, coredump);
> +
> + if (ss->read.size > XE_DEVCOREDUMP_CHUNK_MAX) {
> + ss->read.buffer = kvmalloc(XE_DEVCOREDUMP_CHUNK_MAX,
> + GFP_USER);
> + if (!ss->read.buffer)
> + goto put_pm;
> +
> + __xe_devcoredump_read(ss->read.buffer,
> + XE_DEVCOREDUMP_CHUNK_MAX,
> + 0, coredump);
> + } else {
> + ss->read.buffer = kvmalloc(ss->read.size, GFP_USER);
> + if (!ss->read.buffer)
> + goto put_pm;
> +
> + __xe_devcoredump_read(ss->read.buffer, ss->read.size, 0,
> + coredump);
> + xe_devcoredump_snapshot_free(ss);
> + }
>
> - __xe_devcoredump_read(ss->read.buffer, ss->read.size, coredump);
> - xe_devcoredump_snapshot_free(ss);
> +put_pm:
> + xe_pm_runtime_put(xe);
> }
>
> static void devcoredump_snapshot(struct xe_devcoredump *coredump,
> @@ -425,7 +460,7 @@ void xe_print_blob_ascii85(struct drm_printer *p, const char *prefix, char suffi
> if (offset & 3)
> drm_printf(p, "Offset not word aligned: %zu", offset);
>
> - line_buff = kzalloc(DMESG_MAX_LINE_LEN, GFP_KERNEL);
> + line_buff = kzalloc(DMESG_MAX_LINE_LEN, GFP_ATOMIC);
> if (!line_buff) {
> drm_printf(p, "Failed to allocate line buffer\n");
> return;
> diff --git a/drivers/gpu/drm/xe/xe_devcoredump_types.h b/drivers/gpu/drm/xe/xe_devcoredump_types.h
> index 1a1d16a96b2d..a174385a6d83 100644
> --- a/drivers/gpu/drm/xe/xe_devcoredump_types.h
> +++ b/drivers/gpu/drm/xe/xe_devcoredump_types.h
> @@ -66,6 +66,8 @@ struct xe_devcoredump_snapshot {
> struct {
> /** @read.size: size of devcoredump in human readable format */
> ssize_t size;
> + /** @read.chunk_position: position of devcoredump chunk */
> + ssize_t chunk_position;
> /** @read.buffer: buffer of devcoredump in human readable format */
> char *buffer;
> } read;
> diff --git a/drivers/gpu/drm/xe/xe_guc_hwconfig.c b/drivers/gpu/drm/xe/xe_guc_hwconfig.c
> index af2c817d552c..21403a250834 100644
> --- a/drivers/gpu/drm/xe/xe_guc_hwconfig.c
> +++ b/drivers/gpu/drm/xe/xe_guc_hwconfig.c
> @@ -175,7 +175,7 @@ int xe_guc_hwconfig_lookup_u32(struct xe_guc *guc, u32 attribute, u32 *val)
> if (num_dw == 0)
> return -EINVAL;
>
> - hwconfig = kzalloc(size, GFP_KERNEL);
> + hwconfig = kzalloc(size, GFP_ATOMIC);
Did you work out what was going on here? It seems like this really
should not be happening.
John.
> if (!hwconfig)
> return -ENOMEM;
>
next prev parent reply other threads:[~2025-04-10 17:47 UTC|newest]
Thread overview: 13+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-04-10 4:06 [PATCH v3 0/4] Large devcoredump file support Matthew Brost
2025-04-10 4:06 ` [PATCH v3 1/4] drm/xe: Add devcoredump chunking Matthew Brost
2025-04-10 17:47 ` John Harrison [this message]
2025-04-10 4:06 ` [PATCH v3 2/4] drm/xe: Update xe_ttm_access_memory to use GPU for non-visible access Matthew Brost
2025-04-10 4:06 ` [PATCH v3 3/4] drm/print: Add drm_printer_is_full Matthew Brost
2025-04-10 12:06 ` Jani Nikula
2025-04-15 17:10 ` Matthew Brost
2025-04-16 7:14 ` Jani Nikula
2025-04-10 4:06 ` [PATCH v3 4/4] drm/xe: Abort printing coredump in VM printer output if full Matthew Brost
2025-04-10 4:11 ` ✓ CI.Patch_applied: success for Large devcoredump file support Patchwork
2025-04-10 4:11 ` ✓ CI.checkpatch: " Patchwork
2025-04-10 4:12 ` ✓ CI.KUnit: " Patchwork
2025-04-10 4:28 ` ✗ CI.Build: failure " Patchwork
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=f6aca466-2aae-4a67-a116-38fb93d0a274@intel.com \
--to=john.c.harrison@intel.com \
--cc=intel-xe@lists.freedesktop.org \
--cc=matthew.brost@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.