Intel-XE Archive on lore.kernel.org
 help / color / mirror / Atom feed
From: Matthew Brost <matthew.brost@intel.com>
To: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: <intel-xe@lists.freedesktop.org>, <simona.vetter@ffwll.ch>,
	<christian.koenig@amd.com>, <thomas.hellstrom@linux.intel.com>,
	<joonas.lahtinen@linux.intel.com>,
	<christoph.manszewski@intel.com>, <rodrigo.vivi@intel.com>,
	<lucas.demarchi@intel.com>, <andrzej.hajda@intel.com>,
	<matthew.auld@intel.com>, <maciej.patelczyk@intel.com>,
	<gwan-gyeong.mun@intel.com>,
	Dominik Grzegorzek <dominik.grzegorzek@intel.com>
Subject: Re: [PATCH 16/20] drm/xe/eudebug: Mark guc contexts as debuggable
Date: Mon, 6 Oct 2025 11:35:41 -0700	[thread overview]
Message-ID: <aOQL/Qz32Jwkh6J5@lstrano-desk.jf.intel.com> (raw)
In-Reply-To: <20251006111711.201906-17-mika.kuoppala@linux.intel.com>

On Mon, Oct 06, 2025 at 02:17:06PM +0300, Mika Kuoppala wrote:
> We need to inform to guc which contexts are debuggable
> as their handling is different from ordinary contexts.
> 
> Co-developed-by: Dominik Grzegorzek <dominik.grzegorzek@intel.com>
> Co-developed-by: Maciej Patelczyk <maciej.patelczyk@intel.com>
> Signed-off-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> ---
>  drivers/gpu/drm/xe/abi/guc_actions_abi.h |  5 +++
>  drivers/gpu/drm/xe/xe_eudebug_hw.c       | 55 ++++++++++++++++++++++++
>  drivers/gpu/drm/xe/xe_eudebug_hw.h       |  4 ++
>  drivers/gpu/drm/xe/xe_guc_submit.c       |  4 ++
>  4 files changed, 68 insertions(+)
> 
> diff --git a/drivers/gpu/drm/xe/abi/guc_actions_abi.h b/drivers/gpu/drm/xe/abi/guc_actions_abi.h
> index 47756e4674a1..32a5f680a6d2 100644
> --- a/drivers/gpu/drm/xe/abi/guc_actions_abi.h
> +++ b/drivers/gpu/drm/xe/abi/guc_actions_abi.h
> @@ -155,6 +155,7 @@ enum xe_guc_action {
>  	XE_GUC_ACTION_NOTIFY_FLUSH_LOG_BUFFER_TO_FILE = 0x8003,
>  	XE_GUC_ACTION_NOTIFY_CRASH_DUMP_POSTED = 0x8004,
>  	XE_GUC_ACTION_NOTIFY_EXCEPTION = 0x8005,
> +	XE_GUC_ACTION_EU_KERNEL_DEBUG = 0x8006,
>  	XE_GUC_ACTION_TEST_G2G_SEND = 0xF001,
>  	XE_GUC_ACTION_TEST_G2G_RECV = 0xF002,
>  	XE_GUC_ACTION_LIMIT
> @@ -278,4 +279,8 @@ enum xe_guc_g2g_type {
>  /* invalid type for XE_GUC_ACTION_NOTIFY_MEMORY_CAT_ERROR */
>  #define XE_GUC_CAT_ERR_TYPE_INVALID 0xdeadbeef
>  
> +enum  xe_guc_eu_kernel_debug_request_type {
> +	XE_GUC_EU_KERNEL_DEBUG_ENABLE = 0x3,
> +};
> +
>  #endif
> diff --git a/drivers/gpu/drm/xe/xe_eudebug_hw.c b/drivers/gpu/drm/xe/xe_eudebug_hw.c
> index a62c4b439888..cd4627705b56 100644
> --- a/drivers/gpu/drm/xe/xe_eudebug_hw.c
> +++ b/drivers/gpu/drm/xe/xe_eudebug_hw.c
> @@ -12,6 +12,7 @@
>  #include "regs/xe_gt_regs.h"
>  #include "regs/xe_engine_regs.h"
>  
> +#include "abi/guc_actions_abi.h"
>  #include "xe_eudebug.h"
>  #include "xe_eudebug_types.h"
>  #include "xe_exec_queue.h"
> @@ -20,6 +21,9 @@
>  #include "xe_gt.h"
>  #include "xe_gt_debug.h"
>  #include "xe_gt_mcr.h"
> +#include "xe_guc.h"
> +#include "xe_guc_ct.h"
> +#include "xe_guc_exec_queue_types.h"
>  #include "xe_hw_engine.h"
>  #include "xe_lrc.h"
>  #include "xe_macros.h"
> @@ -675,6 +679,57 @@ static int xe_eu_control_stopped(struct xe_eudebug *d,
>  	return xe_gt_eu_attention_bitmap(q->gt, bits, bitmask_size);
>  }
>  
> +static int xe_guc_action_eu_kernel_debug(struct xe_device *xe,
> +					 struct xe_exec_queue *q,
> +					 struct xe_lrc *lrc, u32 cmd)
> +{
> +	u32 action[] = {
> +		XE_GUC_ACTION_EU_KERNEL_DEBUG,
> +		q->guc->id,
> +		cmd,
> +		0, /* reserved */
> +	};
> +	int ret, i;
> +
> +	if (cmd != XE_GUC_EU_KERNEL_DEBUG_ENABLE)

Maybe an xe_gt_assert here instead.

> +		return -EINVAL;
> +
> +	ret = -EINVAL;
> +	for (i = 0; i < q->width; i++) {

I would double check with the GuC team if you have have enable EU
debugging on each guc_id in multi-lrc contexts. I suspect not given
register, scheduling toggles, and deregister H2G operate only on the
main GuC ID. I'm am however unsure as H2G 8006 is not in GuC spec I'm
looking at.

> +		if (lrc && q->lrc[i] != lrc)
> +			continue;
> +

The above code looks unused as LRC is always NULL.

> +		action[1] = q->guc->id + i;
> +		drm_dbg(&xe->drm, "Guc action[%u] for ctx=%d",
> +			cmd, action[1]);

Prefer xe_gt_dbg.

> +
> +		ret = xe_guc_ct_send(&q->gt->uc.guc.ct,
> +				     action, ARRAY_SIZE(action), 0, 0);
> +
> +		if (ret)
> +			drm_dbg(&xe->drm, "eudebug guc cmd %u failed with %d\n",
> +				cmd, ret);

Prefer xe_gt_dbg.

> +	}
> +
> +	return ret;
> +}
> +
> +static bool xe_guc_has_debug_contexts(struct xe_gt *gt)
> +{
> +	return GUC_FIRMWARE_VER(&gt->uc.guc) >=	MAKE_GUC_VER(70, 49, 0);
> +}
> +
> +int xe_eudebug_exec_queue_enable(struct xe_exec_queue *q)
> +{

The return value is not checked at the caller, so NULL return would be
better choice.

> +	struct xe_device *xe = gt_to_xe(q->gt);
> +
> +	if (!xe_guc_has_debug_contexts(q->gt))
> +		return 0;
> +
> +	return xe_guc_action_eu_kernel_debug(xe, q, NULL,
> +					     XE_GUC_EU_KERNEL_DEBUG_ENABLE);
> +}
> +
>  static struct xe_eudebug_eu_control_ops eu_control = {
>  	.interrupt_all = xe_eu_control_interrupt_all,
>  	.stopped = xe_eu_control_stopped,
> diff --git a/drivers/gpu/drm/xe/xe_eudebug_hw.h b/drivers/gpu/drm/xe/xe_eudebug_hw.h
> index 8f59ec574e4e..5d1df5d7dc46 100644
> --- a/drivers/gpu/drm/xe/xe_eudebug_hw.h
> +++ b/drivers/gpu/drm/xe/xe_eudebug_hw.h
> @@ -23,10 +23,14 @@ long xe_eudebug_eu_control(struct xe_eudebug *d, const u64 arg);
>  
>  struct xe_exec_queue *xe_gt_runalone_active_queue_get(struct xe_gt *gt, int *lrc_idx);
>  
> +int xe_eudebug_exec_queue_enable(struct xe_exec_queue *q);
> +

I'd probably stick this implementation xe_guc_submit.c as even though
this EU debug specific, IMO you don't really need to compile this part
out as surely if EU kconfig is unset EXEC_QUEUE_EUDEBUG_FLAG_ENABLE
would be clear. Maybe ask others about where to stick this, as my
opinion on this isn't strong either way.

Matt

>  #else /* CONFIG_DRM_XE_EUDEBUG */
>  
>  static inline void xe_eudebug_init_hw_engine(struct xe_hw_engine *hwe, bool enable) { }
>  
> +static inline int xe_eudebug_exec_queue_enable(struct xe_exec_queue *q) { return 0; }
> +
>  #endif /* CONFIG_DRM_XE_EUDEBUG */
>  
>  #endif /* _XE_EUDEBUG_HW_H_ */
> diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
> index 16f78376f196..da264c1cfe76 100644
> --- a/drivers/gpu/drm/xe/xe_guc_submit.c
> +++ b/drivers/gpu/drm/xe/xe_guc_submit.c
> @@ -21,6 +21,7 @@
>  #include "xe_assert.h"
>  #include "xe_devcoredump.h"
>  #include "xe_device.h"
> +#include "xe_eudebug_hw.h"
>  #include "xe_exec_queue.h"
>  #include "xe_force_wake.h"
>  #include "xe_gpu_scheduler.h"
> @@ -655,6 +656,9 @@ static void register_exec_queue(struct xe_exec_queue *q, int ctx_type)
>  	if (xe_exec_queue_is_lr(q))
>  		xe_exec_queue_get(q);
>  
> +	if (q->eudebug_flags & EXEC_QUEUE_EUDEBUG_FLAG_ENABLE)
> +		xe_eudebug_exec_queue_enable(q);
> +
>  	set_exec_queue_registered(q);
>  	trace_xe_exec_queue_register(q);
>  	if (xe_exec_queue_is_parallel(q))
> -- 
> 2.43.0
> 

  reply	other threads:[~2025-10-06 18:35 UTC|newest]

Thread overview: 31+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-10-06 11:16 [PATCH 00/20] Intel Xe GPU Debug Support (eudebug) v5 Mika Kuoppala
2025-10-06 11:16 ` [PATCH 01/20] drm/xe/eudebug: Introduce eudebug interface Mika Kuoppala
2025-10-06 11:16 ` [PATCH 02/20] drm/xe/eudebug: Introduce discovery for resources Mika Kuoppala
2025-10-06 11:16 ` [PATCH 03/20] drm/xe/eudebug: Introduce exec_queue events Mika Kuoppala
2025-10-06 11:16 ` [PATCH 04/20] drm/xe: Add EUDEBUG_ENABLE exec queue property Mika Kuoppala
2025-10-06 11:16 ` [PATCH 05/20] drm/xe: Introduce ADD_DEBUG_DATA and REMOVE_DEBUG_DATA vm bind ops Mika Kuoppala
2025-10-06 11:16 ` [PATCH 06/20] drm/xe/eudebug: Introduce vm bind and vm bind debug data events Mika Kuoppala
2025-10-06 11:16 ` [PATCH 07/20] drm/xe/eudebug: Add UFENCE events with acks Mika Kuoppala
2025-10-06 11:16 ` [PATCH 08/20] drm/xe/eudebug: vm open/pread/pwrite Mika Kuoppala
2025-10-06 11:16 ` [PATCH 09/20] drm/xe/eudebug: userptr vm pread/pwrite Mika Kuoppala
2025-10-06 11:17 ` [PATCH 10/20] drm/xe/eudebug: hw enablement for eudebug Mika Kuoppala
2025-10-06 11:17 ` [PATCH 11/20] drm/xe/eudebug: Introduce EU control interface Mika Kuoppala
2025-10-06 11:17 ` [PATCH 12/20] drm/xe/eudebug: Introduce per device attention scan worker Mika Kuoppala
2025-10-06 11:17 ` [PATCH 13/20] drm/xe/eudebug_test: Introduce xe_eudebug wa kunit test Mika Kuoppala
2025-10-06 11:17 ` [PATCH 14/20] drm/xe: Implement SR-IOV and eudebug exclusivity Mika Kuoppala
2025-10-06 11:17 ` [PATCH 15/20] drm/xe: Add xe_client_debugfs and introduce debug_data file Mika Kuoppala
2025-10-06 11:17 ` [PATCH 16/20] drm/xe/eudebug: Mark guc contexts as debuggable Mika Kuoppala
2025-10-06 18:35   ` Matthew Brost [this message]
2025-10-20 12:56     ` Mika Kuoppala
2025-10-20 12:53   ` Mika Kuoppala
2025-11-18 14:48   ` Mika Kuoppala
2025-11-19 21:33     ` Daniele Ceraolo Spurio
2025-10-06 11:17 ` [PATCH 17/20] drm/xe/eudebug: Add read/count/compare helper for eu attention Mika Kuoppala
2025-10-06 11:17 ` [PATCH 18/20] drm/xe/eudebug: Introduce EU pagefault handling interface Mika Kuoppala
2025-10-06 11:17 ` [PATCH 19/20] drm/xe/vm: Support for adding null page VMA to VM on request Mika Kuoppala
2025-10-06 11:17 ` [PATCH 20/20] drm/xe/eudebug: Enable EU pagefault handling Mika Kuoppala
2025-10-06 18:43   ` Matthew Brost
2025-10-06 12:30 ` ✗ CI.checkpatch: warning for Intel Xe GPU Debug Support (eudebug) v5 Patchwork
2025-10-06 12:31 ` ✓ CI.KUnit: success " Patchwork
2025-10-06 13:14 ` ✓ Xe.CI.BAT: " Patchwork
2025-10-06 15:53 ` ✗ Xe.CI.Full: failure " Patchwork

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=aOQL/Qz32Jwkh6J5@lstrano-desk.jf.intel.com \
    --to=matthew.brost@intel.com \
    --cc=andrzej.hajda@intel.com \
    --cc=christian.koenig@amd.com \
    --cc=christoph.manszewski@intel.com \
    --cc=dominik.grzegorzek@intel.com \
    --cc=gwan-gyeong.mun@intel.com \
    --cc=intel-xe@lists.freedesktop.org \
    --cc=joonas.lahtinen@linux.intel.com \
    --cc=lucas.demarchi@intel.com \
    --cc=maciej.patelczyk@intel.com \
    --cc=matthew.auld@intel.com \
    --cc=mika.kuoppala@linux.intel.com \
    --cc=rodrigo.vivi@intel.com \
    --cc=simona.vetter@ffwll.ch \
    --cc=thomas.hellstrom@linux.intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox