From: John.C.Harrison@Intel.com
To: Intel-GFX@Lists.FreeDesktop.Org
Cc: DRI-Devel@Lists.FreeDesktop.Org
Subject: [Intel-gfx] [PATCH 05/12] drm/i915/guc: Record CTB info in error logs
Date: Tue, 12 Jul 2022 16:31:29 -0700 [thread overview]
Message-ID: <20220712233136.1044951-6-John.C.Harrison@Intel.com> (raw)
In-Reply-To: <20220712233136.1044951-1-John.C.Harrison@Intel.com>
From: John Harrison <John.C.Harrison@Intel.com>
When debugging GuC communication issues, it is useful to have the CTB
info available. So add the state and buffer contents to the error
capture log.
Also, add a sub-structure for the GuC specific error capture info as
it is now becoming numerous.
Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
---
drivers/gpu/drm/i915/i915_gpu_error.c | 59 +++++++++++++++++++++++----
drivers/gpu/drm/i915/i915_gpu_error.h | 20 +++++++--
2 files changed, 67 insertions(+), 12 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 5bcf36c292ebd..0e43b8dd22cf7 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -683,6 +683,18 @@ static void err_print_pciid(struct drm_i915_error_state_buf *m,
pdev->subsystem_device);
}
+static void err_print_guc_ctb(struct drm_i915_error_state_buf *m,
+ const char *name,
+ const struct intel_ctb_coredump *ctb)
+{
+ if (!ctb->size)
+ return;
+
+ err_printf(m, "GuC %s CTB: raw: 0x%08X, 0x%08X/%08X, cached: 0x%08X/%08X, desc = 0x%08X, buf = 0x%08X x 0x%08X\n",
+ name, ctb->raw_status, ctb->raw_head, ctb->raw_tail,
+ ctb->head, ctb->tail, ctb->desc_offset, ctb->cmds_offset, ctb->size);
+}
+
static void err_print_uc(struct drm_i915_error_state_buf *m,
const struct intel_uc_coredump *error_uc)
{
@@ -690,8 +702,12 @@ static void err_print_uc(struct drm_i915_error_state_buf *m,
intel_uc_fw_dump(&error_uc->guc_fw, &p);
intel_uc_fw_dump(&error_uc->huc_fw, &p);
- err_printf(m, "GuC timestamp: 0x%08x\n", error_uc->timestamp);
- intel_gpu_error_print_vma(m, NULL, error_uc->guc_log);
+ err_printf(m, "GuC timestamp: 0x%08x\n", error_uc->guc.timestamp);
+ intel_gpu_error_print_vma(m, NULL, error_uc->guc.vma_log);
+ err_printf(m, "GuC CTB fence: %d\n", error_uc->guc.last_fence);
+ err_print_guc_ctb(m, "Send", error_uc->guc.ctb + 0);
+ err_print_guc_ctb(m, "Recv", error_uc->guc.ctb + 1);
+ intel_gpu_error_print_vma(m, NULL, error_uc->guc.vma_ctb);
}
static void err_free_sgl(struct scatterlist *sgl)
@@ -866,7 +882,7 @@ static void __err_print_to_sgl(struct drm_i915_error_state_buf *m,
if (error->gt) {
bool print_guc_capture = false;
- if (error->gt->uc && error->gt->uc->is_guc_capture)
+ if (error->gt->uc && error->gt->uc->guc.is_guc_capture)
print_guc_capture = true;
err_print_gt_display(m, error->gt);
@@ -1021,7 +1037,8 @@ static void cleanup_uc(struct intel_uc_coredump *uc)
{
kfree(uc->guc_fw.path);
kfree(uc->huc_fw.path);
- i915_vma_coredump_free(uc->guc_log);
+ i915_vma_coredump_free(uc->guc.vma_log);
+ i915_vma_coredump_free(uc->guc.vma_ctb);
kfree(uc);
}
@@ -1670,6 +1687,23 @@ gt_record_engines(struct intel_gt_coredump *gt,
}
}
+static void gt_record_guc_ctb(struct intel_ctb_coredump *saved,
+ const struct intel_guc_ct_buffer *ctb,
+ const void *blob_ptr, struct intel_guc *guc)
+{
+ if (!ctb || !ctb->desc)
+ return;
+
+ saved->raw_status = ctb->desc->status;
+ saved->raw_head = ctb->desc->head;
+ saved->raw_tail = ctb->desc->tail;
+ saved->head = ctb->head;
+ saved->tail = ctb->tail;
+ saved->size = ctb->size;
+ saved->desc_offset = ((void *)ctb->desc) - blob_ptr;
+ saved->cmds_offset = ((void *)ctb->cmds) - blob_ptr;
+}
+
static struct intel_uc_coredump *
gt_record_uc(struct intel_gt_coredump *gt,
struct i915_vma_compress *compress)
@@ -1696,9 +1730,16 @@ gt_record_uc(struct intel_gt_coredump *gt,
* log times to system times (in conjunction with the error->boottime and
* gt->clock_frequency fields saved elsewhere).
*/
- error_uc->timestamp = intel_uncore_read(gt->_gt->uncore, GUCPMTIMESTAMP);
- error_uc->guc_log = create_vma_coredump(gt->_gt, uc->guc.log.vma,
- "GuC log buffer", compress);
+ error_uc->guc.timestamp = intel_uncore_read(gt->_gt->uncore, GUCPMTIMESTAMP);
+ error_uc->guc.vma_log = create_vma_coredump(gt->_gt, uc->guc.log.vma,
+ "GuC log buffer", compress);
+ error_uc->guc.vma_ctb = create_vma_coredump(gt->_gt, uc->guc.ct.vma,
+ "GuC CT buffer", compress);
+ error_uc->guc.last_fence = uc->guc.ct.requests.last_fence;
+ gt_record_guc_ctb(error_uc->guc.ctb + 0, &uc->guc.ct.ctbs.send,
+ uc->guc.ct.ctbs.send.desc, (struct intel_guc *)&uc->guc);
+ gt_record_guc_ctb(error_uc->guc.ctb + 1, &uc->guc.ct.ctbs.recv,
+ uc->guc.ct.ctbs.send.desc, (struct intel_guc *)&uc->guc);
return error_uc;
}
@@ -2051,9 +2092,9 @@ __i915_gpu_coredump(struct intel_gt *gt, intel_engine_mask_t engine_mask, u32 du
error->gt->uc = gt_record_uc(error->gt, compress);
if (error->gt->uc) {
if (dump_flags & CORE_DUMP_FLAG_IS_GUC_CAPTURE)
- error->gt->uc->is_guc_capture = true;
+ error->gt->uc->guc.is_guc_capture = true;
else
- GEM_BUG_ON(error->gt->uc->is_guc_capture);
+ GEM_BUG_ON(error->gt->uc->guc.is_guc_capture);
}
}
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h
index d8a8b3d529e09..efc75cc2ffdb9 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.h
+++ b/drivers/gpu/drm/i915/i915_gpu_error.h
@@ -125,6 +125,15 @@ struct intel_engine_coredump {
struct intel_engine_coredump *next;
};
+struct intel_ctb_coredump {
+ u32 raw_head, head;
+ u32 raw_tail, tail;
+ u32 raw_status;
+ u32 desc_offset;
+ u32 cmds_offset;
+ u32 size;
+};
+
struct intel_gt_coredump {
const struct intel_gt *_gt;
bool awake;
@@ -165,9 +174,14 @@ struct intel_gt_coredump {
struct intel_uc_coredump {
struct intel_uc_fw guc_fw;
struct intel_uc_fw huc_fw;
- struct i915_vma_coredump *guc_log;
- u32 timestamp;
- bool is_guc_capture;
+ struct guc_info {
+ struct intel_ctb_coredump ctb[2];
+ struct i915_vma_coredump *vma_ctb;
+ struct i915_vma_coredump *vma_log;
+ u32 timestamp;
+ u16 last_fence;
+ bool is_guc_capture;
+ } guc;
} *uc;
struct intel_gt_coredump *next;
--
2.36.0
next prev parent reply other threads:[~2022-07-12 23:31 UTC|newest]
Thread overview: 39+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-07-12 23:31 [Intel-gfx] [PATCH 00/12] Random assortment of (mostly) GuC related patches John.C.Harrison
2022-07-12 23:31 ` [Intel-gfx] [PATCH 01/12] drm/i915: Remove bogus GEM_BUG_ON in unpark John.C.Harrison
2022-07-18 12:15 ` Tvrtko Ursulin
2022-07-19 0:05 ` John Harrison
2022-07-19 9:42 ` Tvrtko Ursulin
2022-07-21 0:54 ` John Harrison
2022-07-21 9:24 ` Tvrtko Ursulin
2022-07-22 19:09 ` John Harrison
2022-07-12 23:31 ` [Intel-gfx] [PATCH 02/12] drm/i915/guc: Don't call ring_is_idle in GuC submission John.C.Harrison
2022-07-18 12:26 ` Tvrtko Ursulin
2022-07-19 0:09 ` John Harrison
2022-07-19 9:49 ` Tvrtko Ursulin
2022-07-19 10:14 ` Tvrtko Ursulin
2022-07-12 23:31 ` [Intel-gfx] [PATCH 03/12] drm/i915/guc: Fix issues with live_preempt_cancel John.C.Harrison
2022-07-12 23:31 ` [Intel-gfx] [PATCH 04/12] drm/i915/guc: Add GuC <-> kernel time stamp translation information John.C.Harrison
2022-07-12 23:31 ` John.C.Harrison [this message]
2022-07-12 23:31 ` [Intel-gfx] [PATCH 06/12] drm/i915/guc: Use streaming loads to speed up dumping the guc log John.C.Harrison
2022-07-22 20:05 ` John Harrison
2022-07-12 23:31 ` [Intel-gfx] [PATCH 07/12] drm/i915/guc: Route semaphores to GuC for Gen12+ John.C.Harrison
2022-07-13 0:51 ` Matthew Brost
2022-07-15 17:21 ` Ceraolo Spurio, Daniele
2022-07-12 23:31 ` [Intel-gfx] [PATCH 08/12] drm/i915/guc: Add selftest for a hung GuC John.C.Harrison
2022-07-12 23:31 ` [Intel-gfx] [PATCH 09/12] drm/i915/selftest: Cope with not having an RCS engine John.C.Harrison
2022-07-13 0:48 ` Matthew Brost
2022-07-12 23:31 ` [Intel-gfx] [PATCH 10/12] drm/i915/guc: Support larger contexts on newer hardware John.C.Harrison
2022-07-18 12:35 ` Tvrtko Ursulin
2022-07-19 0:13 ` John Harrison
2022-07-19 9:56 ` Tvrtko Ursulin
2022-07-22 19:32 ` John Harrison
2022-07-25 11:24 ` Tvrtko Ursulin
2022-07-12 23:31 ` [Intel-gfx] [PATCH 11/12] drm/i915/guc: Don't abort on CTB_UNUSED status John.C.Harrison
2022-07-18 12:36 ` Tvrtko Ursulin
2022-07-19 0:16 ` John Harrison
2022-07-12 23:31 ` [Intel-gfx] [PATCH 12/12] drm/i915/guc: Add a helper for log buffer size John.C.Harrison
2022-07-13 0:46 ` Matthew Brost
2022-07-13 0:31 ` [Intel-gfx] ✗ Fi.CI.BUILD: warning for Random assortment of (mostly) GuC related patches Patchwork
2022-07-13 20:09 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for Random assortment of (mostly) GuC related patches (rev2) Patchwork
2022-07-13 20:27 ` [Intel-gfx] ✓ Fi.CI.BAT: success " Patchwork
2022-07-14 1:41 ` [Intel-gfx] ✗ Fi.CI.IGT: failure " Patchwork
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20220712233136.1044951-6-John.C.Harrison@Intel.com \
--to=john.c.harrison@intel.com \
--cc=DRI-Devel@Lists.FreeDesktop.Org \
--cc=Intel-GFX@Lists.FreeDesktop.Org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox