From: John.C.Harrison@Intel.com
To: Intel-GFX@Lists.FreeDesktop.Org
Cc: DRI-Devel@Lists.FreeDesktop.Org,
John Harrison <John.C.Harrison@Intel.com>
Subject: [PATCH v8 06/11] drm/xe/guc: Use a two stage dump for GuC logs and add more info
Date: Thu, 19 Sep 2024 20:20:01 -0700 [thread overview]
Message-ID: <20240920032007.629624-7-John.C.Harrison@Intel.com> (raw)
In-Reply-To: <20240920032007.629624-1-John.C.Harrison@Intel.com>
From: John Harrison <John.C.Harrison@Intel.com>
Split the GuC log dump into a two stage snapshot and print mechanism.
This allows the log to be captured at the point of an error (which may
be in a restricted context) and then dump it out later (from a regular
context such as a worker function or a sysfs file handler).
Also add a bunch of other useful pieces of information that can help
(or are fundamentally required!) to decode and parse the log.
v2: Add kerneldoc and fix a couple of comment typos - review feedback
from Michal W.
v3: Move chunking code to this patch as it makes the deltas simpler.
Fix a bunch of kerneldoc issues.
v4: Move the CS frequency out of the coredump snapshot function into
the debugfs only code (as that info is already part of the main
devcoredump). Add a header to the debugfs log to match the one in the
devcoredump to aid processing by a unified tool. Add forcewake to the
GuC timestamp read so it actually works.
v6: Add colon to GuC version string (review feedback by Julia F).
Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
---
drivers/gpu/drm/xe/regs/xe_guc_regs.h | 1 +
drivers/gpu/drm/xe/xe_guc_log.c | 178 +++++++++++++++++++++++---
drivers/gpu/drm/xe/xe_guc_log.h | 4 +
drivers/gpu/drm/xe/xe_guc_log_types.h | 27 ++++
4 files changed, 195 insertions(+), 15 deletions(-)
diff --git a/drivers/gpu/drm/xe/regs/xe_guc_regs.h b/drivers/gpu/drm/xe/regs/xe_guc_regs.h
index a5fd14307f94..b27b73680c12 100644
--- a/drivers/gpu/drm/xe/regs/xe_guc_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_guc_regs.h
@@ -84,6 +84,7 @@
#define HUC_LOADING_AGENT_GUC REG_BIT(1)
#define GUC_WOPCM_OFFSET_VALID REG_BIT(0)
#define GUC_MAX_IDLE_COUNT XE_REG(0xc3e4)
+#define GUC_PMTIMESTAMP XE_REG(0xc3e8)
#define GUC_SEND_INTERRUPT XE_REG(0xc4c8)
#define GUC_SEND_TRIGGER REG_BIT(0)
diff --git a/drivers/gpu/drm/xe/xe_guc_log.c b/drivers/gpu/drm/xe/xe_guc_log.c
index be47780ec2a7..24564624e91e 100644
--- a/drivers/gpu/drm/xe/xe_guc_log.c
+++ b/drivers/gpu/drm/xe/xe_guc_log.c
@@ -6,15 +6,23 @@
#include "xe_guc_log.h"
#include <drm/drm_managed.h>
-#include <linux/vmalloc.h>
+#include "regs/xe_guc_regs.h"
#include "xe_bo.h"
#include "xe_devcoredump.h"
+#include "xe_force_wake.h"
#include "xe_gt.h"
#include "xe_gt_printk.h"
#include "xe_map.h"
+#include "xe_mmio.h"
#include "xe_module.h"
+static struct xe_guc *
+log_to_guc(struct xe_guc_log *log)
+{
+ return container_of(log, struct xe_guc, log);
+}
+
static struct xe_gt *
log_to_gt(struct xe_guc_log *log)
{
@@ -52,35 +60,175 @@ static size_t guc_log_size(void)
CAPTURE_BUFFER_SIZE;
}
+#define GUC_LOG_CHUNK_SIZE SZ_2M
+
+static struct xe_guc_log_snapshot *xe_guc_log_snapshot_alloc(struct xe_guc_log *log, bool atomic)
+{
+ struct xe_guc_log_snapshot *snapshot;
+ size_t remain;
+ int i;
+
+ snapshot = kzalloc(sizeof(*snapshot), atomic ? GFP_ATOMIC : GFP_KERNEL);
+ if (!snapshot)
+ return NULL;
+
+ /*
+ * NB: kmalloc has a hard limit well below the maximum GuC log buffer size.
+ * Also, can't use vmalloc as might be called from atomic context. So need
+ * to break the buffer up into smaller chunks that can be allocated.
+ */
+ snapshot->size = log->bo->size;
+ snapshot->num_chunks = DIV_ROUND_UP(snapshot->size, GUC_LOG_CHUNK_SIZE);
+
+ snapshot->copy = kcalloc(snapshot->num_chunks, sizeof(*snapshot->copy),
+ atomic ? GFP_ATOMIC : GFP_KERNEL);
+ if (!snapshot->copy)
+ goto fail_snap;
+
+ remain = snapshot->size;
+ for (i = 0; i < snapshot->num_chunks; i++) {
+ size_t size = min(GUC_LOG_CHUNK_SIZE, remain);
+
+ snapshot->copy[i] = kmalloc(size, atomic ? GFP_ATOMIC : GFP_KERNEL);
+ if (!snapshot->copy[i])
+ goto fail_copy;
+ remain -= size;
+ }
+
+ return snapshot;
+
+fail_copy:
+ for (i = 0; i < snapshot->num_chunks; i++)
+ kfree(snapshot->copy[i]);
+ kfree(snapshot->copy);
+fail_snap:
+ kfree(snapshot);
+ return NULL;
+}
+
/**
- * xe_guc_log_print - dump a copy of the GuC log to some useful location
+ * xe_guc_log_snapshot_free - free a previously captured GuC log snapshot
+ * @snapshot: GuC log snapshot structure
+ *
+ * Return: pointer to a newly allocated snapshot object or null if out of memory. Caller is
+ * responsible for calling xe_guc_log_snapshot_free when done with the snapshot.
+ */
+void xe_guc_log_snapshot_free(struct xe_guc_log_snapshot *snapshot)
+{
+ int i;
+
+ if (!snapshot)
+ return;
+
+ if (!snapshot->copy) {
+ for (i = 0; i < snapshot->num_chunks; i++)
+ kfree(snapshot->copy[i]);
+ kfree(snapshot->copy);
+ }
+
+ kfree(snapshot);
+}
+
+/**
+ * xe_guc_log_snapshot_capture - create a new snapshot copy the GuC log for later dumping
* @log: GuC log structure
- * @p: the printer object to output to
+ * @atomic: is the call inside an atomic section of some kind?
+ *
+ * Return: pointer to a newly allocated snapshot object or null if out of memory. Caller is
+ * responsible for calling xe_guc_log_snapshot_free when done with the snapshot.
*/
-void xe_guc_log_print(struct xe_guc_log *log, struct drm_printer *p)
+struct xe_guc_log_snapshot *xe_guc_log_snapshot_capture(struct xe_guc_log *log, bool atomic)
{
+ struct xe_guc_log_snapshot *snapshot;
struct xe_device *xe = log_to_xe(log);
- size_t size;
- void *copy;
+ struct xe_guc *guc = log_to_guc(log);
+ struct xe_gt *gt = log_to_gt(log);
+ size_t remain;
+ int i, err;
if (!log->bo) {
- drm_puts(p, "GuC log buffer not allocated");
- return;
+ xe_gt_err(gt, "GuC log buffer not allocated\n");
+ return NULL;
+ }
+
+ snapshot = xe_guc_log_snapshot_alloc(log, atomic);
+ if (!snapshot) {
+ xe_gt_err(gt, "GuC log snapshot not allocated\n");
+ return NULL;
}
- size = log->bo->size;
+ remain = snapshot->size;
+ for (i = 0; i < snapshot->num_chunks; i++) {
+ size_t size = min(GUC_LOG_CHUNK_SIZE, remain);
+
+ xe_map_memcpy_from(xe, snapshot->copy[i], &log->bo->vmap,
+ i * GUC_LOG_CHUNK_SIZE, size);
+ remain -= size;
+ }
+
+ err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+ if (err) {
+ snapshot->stamp = ~0;
+ } else {
+ snapshot->stamp = xe_mmio_read32(>->mmio, GUC_PMTIMESTAMP);
+ xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+ }
+ snapshot->ktime = ktime_get_boottime_ns();
+ snapshot->level = log->level;
+ snapshot->ver_found = guc->fw.versions.found[XE_UC_FW_VER_RELEASE];
+ snapshot->ver_want = guc->fw.versions.wanted;
+ snapshot->path = guc->fw.path;
+
+ return snapshot;
+}
+
+/**
+ * xe_guc_log_snapshot_print - dump a previously saved copy of the GuC log to some useful location
+ * @snapshot: a snapshot of the GuC log
+ * @p: the printer object to output to
+ */
+void xe_guc_log_snapshot_print(struct xe_guc_log_snapshot *snapshot, struct drm_printer *p)
+{
+ size_t remain;
+ int i;
- copy = vmalloc(size);
- if (!copy) {
- drm_printf(p, "Failed to allocate %zu", size);
+ if (!snapshot) {
+ drm_printf(p, "GuC log snapshot not allocated!\n");
return;
}
- xe_map_memcpy_from(xe, copy, &log->bo->vmap, 0, size);
+ drm_printf(p, "GuC firmware: %s\n", snapshot->path);
+ drm_printf(p, "GuC version: %u.%u.%u (wanted %u.%u.%u)\n",
+ snapshot->ver_found.major, snapshot->ver_found.minor, snapshot->ver_found.patch,
+ snapshot->ver_want.major, snapshot->ver_want.minor, snapshot->ver_want.patch);
+ drm_printf(p, "Kernel timestamp: 0x%08llX [%llu]\n", snapshot->ktime, snapshot->ktime);
+ drm_printf(p, "GuC timestamp: 0x%08X [%u]\n", snapshot->stamp, snapshot->stamp);
+ drm_printf(p, "Log level: %u\n", snapshot->level);
+
+ remain = snapshot->size;
+ for (i = 0; i < snapshot->num_chunks; i++) {
+ size_t size = min(GUC_LOG_CHUNK_SIZE, remain);
+
+ xe_print_blob_ascii85(p, i ? NULL : "Log data", snapshot->copy[i], 0, size);
+ remain -= size;
+ }
+}
+
+/**
+ * xe_guc_log_print - dump a copy of the GuC log to some useful location
+ * @log: GuC log structure
+ * @p: the printer object to output to
+ */
+void xe_guc_log_print(struct xe_guc_log *log, struct drm_printer *p)
+{
+ struct xe_guc_log_snapshot *snapshot;
- xe_print_blob_ascii85(p, "Log data", copy, 0, size);
+ drm_printf(p, "**** GuC Log ****\n");
- vfree(copy);
+ snapshot = xe_guc_log_snapshot_capture(log, false);
+ drm_printf(p, "CS reference clock: %u\n", log_to_gt(log)->info.reference_clock);
+ xe_guc_log_snapshot_print(snapshot, p);
+ xe_guc_log_snapshot_free(snapshot);
}
int xe_guc_log_init(struct xe_guc_log *log)
diff --git a/drivers/gpu/drm/xe/xe_guc_log.h b/drivers/gpu/drm/xe/xe_guc_log.h
index 2d25ab28b4b3..949d2c98343d 100644
--- a/drivers/gpu/drm/xe/xe_guc_log.h
+++ b/drivers/gpu/drm/xe/xe_guc_log.h
@@ -9,6 +9,7 @@
#include "xe_guc_log_types.h"
struct drm_printer;
+struct xe_device;
#if IS_ENABLED(CONFIG_DRM_XE_LARGE_GUC_BUFFER)
#define CRASH_BUFFER_SIZE SZ_1M
@@ -38,6 +39,9 @@ struct drm_printer;
int xe_guc_log_init(struct xe_guc_log *log);
void xe_guc_log_print(struct xe_guc_log *log, struct drm_printer *p);
+struct xe_guc_log_snapshot *xe_guc_log_snapshot_capture(struct xe_guc_log *log, bool atomic);
+void xe_guc_log_snapshot_print(struct xe_guc_log_snapshot *snapshot, struct drm_printer *p);
+void xe_guc_log_snapshot_free(struct xe_guc_log_snapshot *snapshot);
static inline u32
xe_guc_log_get_level(struct xe_guc_log *log)
diff --git a/drivers/gpu/drm/xe/xe_guc_log_types.h b/drivers/gpu/drm/xe/xe_guc_log_types.h
index 125080d138a7..962b9edbd9eb 100644
--- a/drivers/gpu/drm/xe/xe_guc_log_types.h
+++ b/drivers/gpu/drm/xe/xe_guc_log_types.h
@@ -8,8 +8,35 @@
#include <linux/types.h>
+#include "xe_uc_fw_types.h"
+
struct xe_bo;
+/**
+ * struct xe_guc_log_snapshot:
+ * Capture of the GuC log plus various state useful for decoding the log
+ */
+struct xe_guc_log_snapshot {
+ /** @size: Size in bytes of the @copy allocation */
+ size_t size;
+ /** @copy: Host memory copy of the log buffer for later dumping, split into chunks */
+ void **copy;
+ /** @num_chunks: Number of chunks within @copy */
+ int num_chunks;
+ /** @ktime: Kernel time the snapshot was taken */
+ u64 ktime;
+ /** @stamp: GuC timestamp at which the snapshot was taken */
+ u32 stamp;
+ /** @level: GuC log verbosity level */
+ u32 level;
+ /** @ver_found: GuC firmware version */
+ struct xe_uc_fw_version ver_found;
+ /** @ver_want: GuC firmware version that driver expected */
+ struct xe_uc_fw_version ver_want;
+ /** @path: Path of GuC firmware blob */
+ const char *path;
+};
+
/**
* struct xe_guc_log - GuC log
*/
--
2.46.0
next prev parent reply other threads:[~2024-09-20 3:20 UTC|newest]
Thread overview: 21+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-09-20 3:19 [PATCH v8 00/11] drm/xe/guc: Improve GuC log dumping and add to devcoredump John.C.Harrison
2024-09-20 3:19 ` [PATCH v8 01/11] drm/xe/guc: Remove spurious line feed in debug print John.C.Harrison
2024-09-20 3:19 ` [PATCH v8 02/11] drm/xe/devcoredump: Use drm_puts and already cached local variables John.C.Harrison
2024-09-20 3:19 ` [PATCH v8 03/11] drm/xe/devcoredump: Improve section headings and add tile info John.C.Harrison
2024-09-20 3:19 ` [PATCH v8 04/11] drm/xe/devcoredump: Add ASCII85 dump helper function John.C.Harrison
2024-09-20 3:20 ` [PATCH v8 05/11] drm/xe/guc: Copy GuC log prior to dumping John.C.Harrison
2024-09-20 3:20 ` John.C.Harrison [this message]
2024-09-20 3:20 ` [PATCH v8 07/11] drm/print: Introduce drm_line_printer John.C.Harrison
2024-09-20 3:20 ` [PATCH v8 08/11] drm/xe/guc: Dead CT helper John.C.Harrison
2024-09-20 3:20 ` [PATCH v8 09/11] drm/xe/guc: Dump entire CTB on errors John.C.Harrison
2024-09-20 3:20 ` [PATCH v8 10/11] drm/xe/guc: Add GuC log to devcoredump captures John.C.Harrison
2024-09-20 3:20 ` [PATCH v8 11/11] drm/xe/guc: Add a helper function for dumping GuC log to dmesg John.C.Harrison
2024-09-20 3:22 ` [PATCH v8 00/11] drm/xe/guc: Improve GuC log dumping and add to devcoredump John Harrison
2024-09-20 7:12 ` ✗ Fi.CI.CHECKPATCH: warning for " Patchwork
2024-09-20 7:12 ` ✗ Fi.CI.SPARSE: " Patchwork
2024-09-20 7:13 ` ✓ Fi.CI.BAT: success " Patchwork
2024-09-20 23:26 ` ✗ Fi.CI.IGT: failure " Patchwork
-- strict thread matches above, loose matches on Subject: below --
2024-09-20 3:20 [PATCH v8 00/11] " John.C.Harrison
2024-09-20 3:21 ` [PATCH v8 06/11] drm/xe/guc: Use a two stage dump for GuC logs and add more info John.C.Harrison
2024-10-01 22:17 ` Julia Filipchuk
2024-10-02 20:53 ` Julia Filipchuk
2024-10-02 21:26 ` John Harrison
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240920032007.629624-7-John.C.Harrison@Intel.com \
--to=john.c.harrison@intel.com \
--cc=DRI-Devel@Lists.FreeDesktop.Org \
--cc=Intel-GFX@Lists.FreeDesktop.Org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.