From: John.C.Harrison@Intel.com
To: Intel-GFX@Lists.FreeDesktop.Org
Cc: DRI-Devel@Lists.FreeDesktop.Org
Subject: [Intel-gfx] [PATCH 1/2] drm/i915: Dump error capture to kernel log
Date: Mon, 10 Apr 2023 12:25:22 -0700 [thread overview]
Message-ID: <20230410192523.2020049-2-John.C.Harrison@Intel.com> (raw)
In-Reply-To: <20230410192523.2020049-1-John.C.Harrison@Intel.com>
From: John Harrison <John.C.Harrison@Intel.com>
This is useful for getting debug information out in certain
situations, such as failing kernel selftests and CI runs that don't
log error captures. It is especially useful for things like retrieving
GuC logs as GuC operation can't be tracked by adding printk or ftrace
entries.
Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
---
drivers/gpu/drm/i915/i915_gpu_error.c | 130 ++++++++++++++++++++++++++
drivers/gpu/drm/i915/i915_gpu_error.h | 8 ++
2 files changed, 138 insertions(+)
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index f020c0086fbcd..500fec34188a0 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -2219,3 +2219,133 @@ void i915_disable_error_state(struct drm_i915_private *i915, int err)
i915->gpu_error.first_error = ERR_PTR(err);
spin_unlock_irq(&i915->gpu_error.lock);
}
+
+void intel_klog_error_capture(struct intel_gt *gt,
+ intel_engine_mask_t engine_mask)
+{
+ static int g_count;
+ struct drm_i915_private *i915 = gt->i915;
+ struct i915_gpu_coredump *error;
+ intel_wakeref_t wakeref;
+ size_t buf_size = PAGE_SIZE * 128;
+ size_t pos_err;
+ char *buf, *ptr, *next;
+ int l_count = g_count++;
+ int line = 0;
+
+ /* Can't allocate memory during a reset */
+ if (test_bit(I915_RESET_BACKOFF, >->reset.flags)) {
+ drm_err(>->i915->drm, "[Capture/%d.%d] Inside GT reset, skipping error capture :(\n",
+ l_count, line++);
+ return;
+ }
+
+ error = READ_ONCE(i915->gpu_error.first_error);
+ if (error) {
+ drm_err(&i915->drm, "[Capture/%d.%d] Clearing existing error capture first...\n",
+ l_count, line++);
+ i915_reset_error_state(i915);
+ }
+
+ with_intel_runtime_pm(&i915->runtime_pm, wakeref)
+ error = i915_gpu_coredump(gt, engine_mask, CORE_DUMP_FLAG_NONE);
+
+ if (IS_ERR(error)) {
+ drm_err(&i915->drm, "[Capture/%d.%d] Failed to capture error capture: %ld!\n",
+ l_count, line++, PTR_ERR(error));
+ return;
+ }
+
+ buf = kvmalloc(buf_size, GFP_KERNEL);
+ if (!buf) {
+ drm_err(&i915->drm, "[Capture/%d.%d] Failed to allocate buffer for error capture!\n",
+ l_count, line++);
+ i915_gpu_coredump_put(error);
+ return;
+ }
+
+ drm_info(&i915->drm, "[Capture/%d.%d] Dumping i915 error capture for %ps...\n",
+ l_count, line++, __builtin_return_address(0));
+
+ /* Largest string length safe to print via dmesg */
+# define MAX_CHUNK 800
+
+ pos_err = 0;
+ while (1) {
+ ssize_t got = i915_gpu_coredump_copy_to_buffer(error, buf, pos_err, buf_size - 1);
+
+ if (got <= 0)
+ break;
+
+ buf[got] = 0;
+ pos_err += got;
+
+ ptr = buf;
+ while (got > 0) {
+ size_t count;
+ char tag[2];
+
+ next = strnchr(ptr, got, '\n');
+ if (next) {
+ count = next - ptr;
+ *next = 0;
+ tag[0] = '>';
+ tag[1] = '<';
+ } else {
+ count = got;
+ tag[0] = '}';
+ tag[1] = '{';
+ }
+
+ if (count > MAX_CHUNK) {
+ size_t pos;
+ char *ptr2 = ptr;
+
+ for (pos = MAX_CHUNK; pos < count; pos += MAX_CHUNK) {
+ char chr = ptr[pos];
+
+ ptr[pos] = 0;
+ drm_info(&i915->drm, "[Capture/%d.%d] }%s{\n",
+ l_count, line++, ptr2);
+ ptr[pos] = chr;
+ ptr2 = ptr + pos;
+
+ /*
+ * If spewing large amounts of data via a serial console,
+ * this can be a very slow process. So be friendly and try
+ * not to cause 'softlockup on CPU' problems.
+ */
+ cond_resched();
+ }
+
+ if (ptr2 < (ptr + count))
+ drm_info(&i915->drm, "[Capture/%d.%d] %c%s%c\n",
+ l_count, line++, tag[0], ptr2, tag[1]);
+ else if (tag[0] == '>')
+ drm_info(&i915->drm, "[Capture/%d.%d] ><\n",
+ l_count, line++);
+ } else {
+ drm_info(&i915->drm, "[Capture/%d.%d] %c%s%c\n",
+ l_count, line++, tag[0], ptr, tag[1]);
+ }
+
+ ptr = next;
+ got -= count;
+ if (next) {
+ ptr++;
+ got--;
+ }
+
+ /* As above. */
+ cond_resched();
+ }
+
+ if (got)
+ drm_info(&i915->drm, "[Capture/%d.%d] Got %zd bytes remaining!\n",
+ l_count, line++, got);
+ }
+
+ kvfree(buf);
+
+ drm_info(&i915->drm, "[Capture/%d.%d] Dumped %zd bytes\n", l_count, line++, pos_err);
+}
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h
index a91932cc65317..b14c2c9915141 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.h
+++ b/drivers/gpu/drm/i915/i915_gpu_error.h
@@ -260,6 +260,9 @@ static inline u32 i915_reset_engine_count(struct i915_gpu_error *error,
#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
+void intel_klog_error_capture(struct intel_gt *gt,
+ intel_engine_mask_t engine_mask);
+
__printf(2, 3)
void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...);
void intel_gpu_error_print_vma(struct drm_i915_error_state_buf *m,
@@ -323,6 +326,11 @@ void i915_disable_error_state(struct drm_i915_private *i915, int err);
#else
+static inline void intel_klog_error_capture(struct intel_gt *gt,
+ intel_engine_mask_t engine_mask)
+{
+}
+
__printf(2, 3)
static inline void
i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...)
--
2.39.1
next prev parent reply other threads:[~2023-04-10 19:26 UTC|newest]
Thread overview: 11+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-04-10 19:25 [Intel-gfx] [PATCH 0/2] Add support for dumping error captures via kernel logging John.C.Harrison
2023-04-10 19:25 ` John.C.Harrison [this message]
2023-04-10 19:25 ` [Intel-gfx] [PATCH 2/2] drm/i915/guc: Dump error capture to dmesg on CTB error John.C.Harrison
2023-04-10 19:50 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for Add support for dumping error captures via kernel logging Patchwork
2023-04-10 19:50 ` [Intel-gfx] ✗ Fi.CI.SPARSE: " Patchwork
2023-04-10 19:59 ` [Intel-gfx] ✓ Fi.CI.BAT: success " Patchwork
2023-04-10 21:15 ` [Intel-gfx] ✓ Fi.CI.IGT: " Patchwork
2023-04-11 14:41 ` [Intel-gfx] [PATCH 0/2] " Rodrigo Vivi
2023-04-11 16:41 ` John Harrison
2023-04-11 16:50 ` Daniel Vetter
2023-04-18 16:38 ` Rodrigo Vivi
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20230410192523.2020049-2-John.C.Harrison@Intel.com \
--to=john.c.harrison@intel.com \
--cc=DRI-Devel@Lists.FreeDesktop.Org \
--cc=Intel-GFX@Lists.FreeDesktop.Org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox