From: Rodrigo Vivi <rodrigo.vivi@intel.com>
To: <intel-xe@lists.freedesktop.org>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Subject: [PATCH 2/2] drm/xe: Make a standalone snapshot to that survives unbind
Date: Tue, 30 Jan 2024 17:37:09 -0500 [thread overview]
Message-ID: <20240130223709.50881-2-rodrigo.vivi@intel.com> (raw)
In-Reply-To: <20240130223709.50881-1-rodrigo.vivi@intel.com>
Instead of having the coredump embedded to the xe device,
let's dynamically allocate that and remove only when
requested by devcoredump.
This will allow the 'data' to be read even when the xe_device
is already gone at unbind for instance.
Of course, the module cannot be unloaded, but this is
guaranteed by devcoredump holding the xe module reference.
Only after devcoredump device deletion is that this reference
will be put and the xe module can be removed.
Our scripts and IGT helpers neeed to be adjusted to write
something to the data file before rmmod so the driver can
be properly removed or reloaded.
Cc: José Roberto de Souza <jose.souza@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
drivers/gpu/drm/xe/xe_devcoredump.c | 32 ++++++++++------------------
drivers/gpu/drm/xe/xe_device_types.h | 2 +-
2 files changed, 12 insertions(+), 22 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c
index 30e7edbb8b6f..64886773b70b 100644
--- a/drivers/gpu/drm/xe/xe_devcoredump.c
+++ b/drivers/gpu/drm/xe/xe_devcoredump.c
@@ -49,11 +49,6 @@
#ifdef CONFIG_DEV_COREDUMP
-static struct xe_device *coredump_to_xe(const struct xe_devcoredump *coredump)
-{
- return container_of(coredump, struct xe_device, devcoredump);
-}
-
static struct xe_guc *exec_queue_to_guc(struct xe_exec_queue *q)
{
return &q->gt->uc.guc;
@@ -69,10 +64,6 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset,
struct timespec64 ts;
int i;
- /* Our device is gone already... */
- if (!data || !coredump_to_xe(coredump))
- return -ENODEV;
-
iter.data = buffer;
iter.offset = 0;
iter.start = offset;
@@ -109,10 +100,6 @@ static void xe_devcoredump_free(void *data)
struct xe_devcoredump *coredump = data;
int i;
- /* Our device is gone. Nothing to do... */
- if (!data || !coredump_to_xe(coredump))
- return;
-
xe_device_snapshot_free(coredump->snapshot.xe);
xe_guc_ct_snapshot_free(coredump->snapshot.ct);
xe_guc_exec_queue_snapshot_free(coredump->snapshot.ge);
@@ -121,8 +108,8 @@ static void xe_devcoredump_free(void *data)
xe_hw_engine_snapshot_free(coredump->snapshot.hwe[i]);
coredump->captured = false;
- drm_info(&coredump_to_xe(coredump)->drm,
- "Xe device coredump has been deleted.\n");
+ coredump = NULL;
+ kfree(coredump);
}
static void devcoredump_snapshot(struct xe_devcoredump *coredump,
@@ -181,21 +168,24 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump,
void xe_devcoredump(struct xe_sched_job *job)
{
struct xe_device *xe = gt_to_xe(job->q->gt);
- struct xe_devcoredump *coredump = &xe->devcoredump;
- if (coredump->captured) {
- drm_dbg(&xe->drm, "Multiple hangs are occurring, but only the first snapshot was taken\n");
+ if (xe->devcoredump && xe->devcoredump->captured) {
+ drm_info(&xe->drm, "Multiple hangs are occurring, but only the first snapshot was taken\n");
return;
}
- coredump->captured = true;
- devcoredump_snapshot(coredump, job);
+ xe->devcoredump = kzalloc(sizeof(*xe->devcoredump), GFP_KERNEL);
+ if (!xe->devcoredump)
+ drm_err(&xe->drm, "devcoredump failed\n");
+
+ xe->devcoredump->captured = true;
+ devcoredump_snapshot(xe->devcoredump, job);
drm_info(&xe->drm, "Xe device coredump has been created\n");
drm_info(&xe->drm, "Check your /sys/class/drm/card%d/device/devcoredump/data\n",
xe->drm.primary->index);
- dev_coredumpm(xe->drm.dev, THIS_MODULE, coredump, 0, GFP_KERNEL,
+ dev_coredumpm(xe->drm.dev, THIS_MODULE, xe->devcoredump, 0, GFP_KERNEL,
xe_devcoredump_read, xe_devcoredump_free);
}
#endif
diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index 50dac1a5b053..4372f5cc98b6 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -214,7 +214,7 @@ struct xe_device {
struct drm_device drm;
/** @devcoredump: device coredump */
- struct xe_devcoredump devcoredump;
+ struct xe_devcoredump *devcoredump;
/** @info: device info */
struct intel_device_info {
--
2.43.0
next prev parent reply other threads:[~2024-01-30 22:37 UTC|newest]
Thread overview: 5+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-01-30 22:37 [PATCH 1/2] drm/xe: Convert xe_device_snapshot to a standalone snapshot Rodrigo Vivi
2024-01-30 22:37 ` Rodrigo Vivi [this message]
2024-01-31 8:58 ` [PATCH 2/2] drm/xe: Make a standalone snapshot to that survives unbind Jani Nikula
2024-01-31 5:02 ` ✗ CI.Patch_applied: failure for series starting with [1/2] drm/xe: Convert xe_device_snapshot to a standalone snapshot Patchwork
2024-01-31 13:44 ` [PATCH 1/2] " Souza, Jose
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240130223709.50881-2-rodrigo.vivi@intel.com \
--to=rodrigo.vivi@intel.com \
--cc=intel-xe@lists.freedesktop.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox