From: "Belgaumkar, Vinay" <vinay.belgaumkar@intel.com>
To: <John.C.Harrison@Intel.com>, <Intel-GFX@Lists.FreeDesktop.Org>
Cc: DRI-Devel@Lists.FreeDesktop.Org
Subject: Re: [Intel-gfx] [PATCH v2 1/2] drm/i915: Dump error capture to kernel log
Date: Tue, 16 May 2023 12:17:08 -0700 [thread overview]
Message-ID: <696ba7f2-4353-e154-af0b-83604dda2546@intel.com> (raw)
In-Reply-To: <20230418181744.3251240-2-John.C.Harrison@Intel.com>
On 4/18/2023 11:17 AM, John.C.Harrison@Intel.com wrote:
> From: John Harrison <John.C.Harrison@Intel.com>
>
> This is useful for getting debug information out in certain
> situations, such as failing kernel selftests and CI runs that don't
> log error captures. It is especially useful for things like retrieving
> GuC logs as GuC operation can't be tracked by adding printk or ftrace
> entries.
>
> v2: Add CONFIG_DRM_I915_DEBUG_GEM wrapper (review feedback by Rodrigo).
Do the CI sparse warnings hold water? With that looked at,
LGTM,
Reviewed-by: Vinay Belgaumkar <vinay.belgaumkar@intel.com>
>
> Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
> ---
> drivers/gpu/drm/i915/i915_gpu_error.c | 132 ++++++++++++++++++++++++++
> drivers/gpu/drm/i915/i915_gpu_error.h | 10 ++
> 2 files changed, 142 insertions(+)
>
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
> index f020c0086fbcd..03d62c250c465 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.c
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> @@ -2219,3 +2219,135 @@ void i915_disable_error_state(struct drm_i915_private *i915, int err)
> i915->gpu_error.first_error = ERR_PTR(err);
> spin_unlock_irq(&i915->gpu_error.lock);
> }
> +
> +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
> +void intel_klog_error_capture(struct intel_gt *gt,
> + intel_engine_mask_t engine_mask)
> +{
> + static int g_count;
> + struct drm_i915_private *i915 = gt->i915;
> + struct i915_gpu_coredump *error;
> + intel_wakeref_t wakeref;
> + size_t buf_size = PAGE_SIZE * 128;
> + size_t pos_err;
> + char *buf, *ptr, *next;
> + int l_count = g_count++;
> + int line = 0;
> +
> + /* Can't allocate memory during a reset */
> + if (test_bit(I915_RESET_BACKOFF, >->reset.flags)) {
> + drm_err(>->i915->drm, "[Capture/%d.%d] Inside GT reset, skipping error capture :(\n",
> + l_count, line++);
> + return;
> + }
> +
> + error = READ_ONCE(i915->gpu_error.first_error);
> + if (error) {
> + drm_err(&i915->drm, "[Capture/%d.%d] Clearing existing error capture first...\n",
> + l_count, line++);
> + i915_reset_error_state(i915);
> + }
> +
> + with_intel_runtime_pm(&i915->runtime_pm, wakeref)
> + error = i915_gpu_coredump(gt, engine_mask, CORE_DUMP_FLAG_NONE);
> +
> + if (IS_ERR(error)) {
> + drm_err(&i915->drm, "[Capture/%d.%d] Failed to capture error capture: %ld!\n",
> + l_count, line++, PTR_ERR(error));
> + return;
> + }
> +
> + buf = kvmalloc(buf_size, GFP_KERNEL);
> + if (!buf) {
> + drm_err(&i915->drm, "[Capture/%d.%d] Failed to allocate buffer for error capture!\n",
> + l_count, line++);
> + i915_gpu_coredump_put(error);
> + return;
> + }
> +
> + drm_info(&i915->drm, "[Capture/%d.%d] Dumping i915 error capture for %ps...\n",
> + l_count, line++, __builtin_return_address(0));
> +
> + /* Largest string length safe to print via dmesg */
> +# define MAX_CHUNK 800
> +
> + pos_err = 0;
> + while (1) {
> + ssize_t got = i915_gpu_coredump_copy_to_buffer(error, buf, pos_err, buf_size - 1);
> +
> + if (got <= 0)
> + break;
> +
> + buf[got] = 0;
> + pos_err += got;
> +
> + ptr = buf;
> + while (got > 0) {
> + size_t count;
> + char tag[2];
> +
> + next = strnchr(ptr, got, '\n');
> + if (next) {
> + count = next - ptr;
> + *next = 0;
> + tag[0] = '>';
> + tag[1] = '<';
> + } else {
> + count = got;
> + tag[0] = '}';
> + tag[1] = '{';
> + }
> +
> + if (count > MAX_CHUNK) {
> + size_t pos;
> + char *ptr2 = ptr;
> +
> + for (pos = MAX_CHUNK; pos < count; pos += MAX_CHUNK) {
> + char chr = ptr[pos];
> +
> + ptr[pos] = 0;
> + drm_info(&i915->drm, "[Capture/%d.%d] }%s{\n",
> + l_count, line++, ptr2);
> + ptr[pos] = chr;
> + ptr2 = ptr + pos;
> +
> + /*
> + * If spewing large amounts of data via a serial console,
> + * this can be a very slow process. So be friendly and try
> + * not to cause 'softlockup on CPU' problems.
> + */
> + cond_resched();
> + }
> +
> + if (ptr2 < (ptr + count))
> + drm_info(&i915->drm, "[Capture/%d.%d] %c%s%c\n",
> + l_count, line++, tag[0], ptr2, tag[1]);
> + else if (tag[0] == '>')
> + drm_info(&i915->drm, "[Capture/%d.%d] ><\n",
> + l_count, line++);
> + } else {
> + drm_info(&i915->drm, "[Capture/%d.%d] %c%s%c\n",
> + l_count, line++, tag[0], ptr, tag[1]);
> + }
> +
> + ptr = next;
> + got -= count;
> + if (next) {
> + ptr++;
> + got--;
> + }
> +
> + /* As above. */
> + cond_resched();
> + }
> +
> + if (got)
> + drm_info(&i915->drm, "[Capture/%d.%d] Got %zd bytes remaining!\n",
> + l_count, line++, got);
> + }
> +
> + kvfree(buf);
> +
> + drm_info(&i915->drm, "[Capture/%d.%d] Dumped %zd bytes\n", l_count, line++, pos_err);
> +}
> +#endif
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h
> index a91932cc65317..a78c061ce26fb 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.h
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.h
> @@ -258,6 +258,16 @@ static inline u32 i915_reset_engine_count(struct i915_gpu_error *error,
> #define CORE_DUMP_FLAG_NONE 0x0
> #define CORE_DUMP_FLAG_IS_GUC_CAPTURE BIT(0)
>
> +#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) && IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
> +void intel_klog_error_capture(struct intel_gt *gt,
> + intel_engine_mask_t engine_mask);
> +#else
> +static inline void intel_klog_error_capture(struct intel_gt *gt,
> + intel_engine_mask_t engine_mask)
> +{
> +}
> +#endif
> +
> #if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
>
> __printf(2, 3)
next prev parent reply other threads:[~2023-05-16 19:17 UTC|newest]
Thread overview: 13+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-04-18 18:17 [Intel-gfx] [PATCH v2 0/2] Add support for dumping error captures via kernel logging John.C.Harrison
2023-04-18 18:17 ` [Intel-gfx] [PATCH v2 1/2] drm/i915: Dump error capture to kernel log John.C.Harrison
2023-05-16 19:17 ` Belgaumkar, Vinay [this message]
2023-05-16 19:21 ` John Harrison
2023-05-16 20:52 ` Rodrigo Vivi
2023-05-16 22:06 ` John Harrison
2023-04-18 18:17 ` [Intel-gfx] [PATCH v2 2/2] drm/i915/guc: Dump error capture to dmesg on CTB error John.C.Harrison
2023-05-16 19:17 ` Belgaumkar, Vinay
2023-04-18 18:58 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for Add support for dumping error captures via kernel logging (rev2) Patchwork
2023-04-18 18:58 ` [Intel-gfx] ✗ Fi.CI.SPARSE: " Patchwork
2023-04-18 19:08 ` [Intel-gfx] ✓ Fi.CI.BAT: success " Patchwork
2023-04-18 23:46 ` [Intel-gfx] ✓ Fi.CI.IGT: " Patchwork
2023-05-16 18:54 ` [Intel-gfx] [PATCH v2 0/2] Add support for dumping error captures via kernel logging Belgaumkar, Vinay
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=696ba7f2-4353-e154-af0b-83604dda2546@intel.com \
--to=vinay.belgaumkar@intel.com \
--cc=DRI-Devel@Lists.FreeDesktop.Org \
--cc=Intel-GFX@Lists.FreeDesktop.Org \
--cc=John.C.Harrison@Intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox