Intel-XE Archive on lore.kernel.org
 help / color / mirror / Atom feed
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
To: Matthew Brost <matthew.brost@intel.com>, intel-xe@lists.freedesktop.org
Subject: Re: [PATCH 1/4] drm/xe/guc: Add more GuC CT states
Date: Sat, 30 Dec 2023 00:36:30 +0100	[thread overview]
Message-ID: <6c1429e3-5b0d-4aa9-b244-354dc9b780ac@intel.com> (raw)
In-Reply-To: <20231229043507.1002411-2-matthew.brost@intel.com>



On 29.12.2023 05:35, Matthew Brost wrote:
> The Guc CT has more than enabled / disables states rather it has 4. The
> 4 states are not initialized, disabled, drop messages, and enabled.

"drop messages" sounds strange as state name, maybe "stopped" ?

> Change the code to reflect this. These states will enable proper return
> codes from functions and therefore enable proper error messages.
> 
> Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
> Cc: Tejas Upadhyay <tejas.upadhyay@intel.com>
> Signed-off-by: Matthew Brost <matthew.brost@intel.com>
> ---
>  drivers/gpu/drm/xe/xe_guc.c          |  4 +-
>  drivers/gpu/drm/xe/xe_guc_ct.c       | 55 ++++++++++++++++++++--------
>  drivers/gpu/drm/xe/xe_guc_ct.h       |  8 +++-
>  drivers/gpu/drm/xe/xe_guc_ct_types.h | 18 ++++++++-
>  4 files changed, 64 insertions(+), 21 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
> index 811e8b201270..088f7b01d761 100644
> --- a/drivers/gpu/drm/xe/xe_guc.c
> +++ b/drivers/gpu/drm/xe/xe_guc.c
> @@ -651,7 +651,7 @@ int xe_guc_mmio_send_recv(struct xe_guc *guc, const u32 *request,
>  
>  	BUILD_BUG_ON(VF_SW_FLAG_COUNT != MED_VF_SW_FLAG_COUNT);
>  
> -	xe_assert(xe, !guc->ct.enabled);
> +	xe_assert(xe, !xe_guc_ct_enabled(&guc->ct));
>  	xe_assert(xe, len);
>  	xe_assert(xe, len <= VF_SW_FLAG_COUNT);
>  	xe_assert(xe, len <= MED_VF_SW_FLAG_COUNT);
> @@ -833,7 +833,7 @@ int xe_guc_stop(struct xe_guc *guc)
>  {
>  	int ret;
>  
> -	xe_guc_ct_disable(&guc->ct);
> +	xe_guc_ct_drop_messages(&guc->ct);
>  
>  	ret = xe_guc_submit_stop(guc);
>  	if (ret)
> diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c
> index 4cde93c18a2d..8c91d189d859 100644
> --- a/drivers/gpu/drm/xe/xe_guc_ct.c
> +++ b/drivers/gpu/drm/xe/xe_guc_ct.c
> @@ -279,12 +279,25 @@ static int guc_ct_control_toggle(struct xe_guc_ct *ct, bool enable)
>  	return ret > 0 ? -EPROTO : ret;
>  }
>  
> +static void xe_guc_ct_set_state(struct xe_guc_ct *ct,

nit: for static functions this "xe_" prefix is not needed

> +				enum xe_guc_ct_state state)
> +{
> +	mutex_lock(&ct->lock);		/* Serialise dequeue_one_g2h() */
> +	spin_lock_irq(&ct->fast_lock);	/* Serialise CT fast-path */

maybe instead of putting ad-hoc comments we should add bigger
documentation for lock/fast_lock usage and dependencies?

> +
> +	ct->g2h_outstanding = 0;
> +	ct->state = state;
> +
> +	spin_unlock_irq(&ct->fast_lock);
> +	mutex_unlock(&ct->lock);
> +}
> +
>  int xe_guc_ct_enable(struct xe_guc_ct *ct)
>  {
>  	struct xe_device *xe = ct_to_xe(ct);
>  	int err;
>  
> -	xe_assert(xe, !ct->enabled);
> +	xe_assert(xe, !xe_guc_ct_enabled(ct));
>  
>  	guc_ct_ctb_h2g_init(xe, &ct->ctbs.h2g, &ct->bo->vmap);
>  	guc_ct_ctb_g2h_init(xe, &ct->ctbs.g2h, &ct->bo->vmap);
> @@ -301,12 +314,7 @@ int xe_guc_ct_enable(struct xe_guc_ct *ct)
>  	if (err)
>  		goto err_out;
>  
> -	mutex_lock(&ct->lock);
> -	spin_lock_irq(&ct->fast_lock);
> -	ct->g2h_outstanding = 0;
> -	ct->enabled = true;
> -	spin_unlock_irq(&ct->fast_lock);
> -	mutex_unlock(&ct->lock);
> +	xe_guc_ct_set_state(ct, XE_GUC_CT_STATE_ENABLED);
>  
>  	smp_mb();
>  	wake_up_all(&ct->wq);
> @@ -322,12 +330,12 @@ int xe_guc_ct_enable(struct xe_guc_ct *ct)
>  
>  void xe_guc_ct_disable(struct xe_guc_ct *ct)
>  {
> -	mutex_lock(&ct->lock); /* Serialise dequeue_one_g2h() */
> -	spin_lock_irq(&ct->fast_lock); /* Serialise CT fast-path */
> -	ct->enabled = false; /* Finally disable CT communication */
> -	spin_unlock_irq(&ct->fast_lock);
> -	mutex_unlock(&ct->lock);
> +	xe_guc_ct_set_state(ct, XE_GUC_CT_STATE_DISABLED);
> +}
>  
> +void xe_guc_ct_drop_messages(struct xe_guc_ct *ct)
> +{
> +	xe_guc_ct_set_state(ct, XE_GUC_CT_STATE_DROP_MESSAGES);
>  	xa_destroy(&ct->fence_lookup);
>  }
>  
> @@ -514,11 +522,19 @@ static int __guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action,
>  		goto out;
>  	}
>  
> -	if (unlikely(!ct->enabled)) {
> +	if (ct->state == XE_GUC_CT_STATE_NOT_INITIALIZED ||
> +	    ct->state == XE_GUC_CT_STATE_DISABLED) {
>  		ret = -ENODEV;
>  		goto out;
>  	}
>  
> +	if (ct->state == XE_GUC_CT_STATE_DROP_MESSAGES) {
> +		ret = -ECANCELED;
> +		goto out;
> +	}
> +
> +	xe_assert(xe, xe_guc_ct_enabled(ct));
> +
>  	if (g2h_fence) {
>  		g2h_len = GUC_CTB_HXG_MSG_MAX_LEN;
>  		num_g2h = 1;
> @@ -706,7 +722,8 @@ static bool retry_failure(struct xe_guc_ct *ct, int ret)
>  		return false;
>  
>  #define ct_alive(ct)	\
> -	(ct->enabled && !ct->ctbs.h2g.info.broken && !ct->ctbs.g2h.info.broken)
> +	(xe_guc_ct_enabled(ct) && !ct->ctbs.h2g.info.broken && \
> +	 !ct->ctbs.g2h.info.broken)
>  	if (!wait_event_interruptible_timeout(ct->wq, ct_alive(ct),  HZ * 5))
>  		return false;
>  #undef ct_alive
> @@ -987,12 +1004,18 @@ static int g2h_read(struct xe_guc_ct *ct, u32 *msg, bool fast_path)
>  
>  	lockdep_assert_held(&ct->fast_lock);
>  
> -	if (!ct->enabled)
> +	if (ct->state == XE_GUC_CT_STATE_NOT_INITIALIZED ||
> +	    ct->state == XE_GUC_CT_STATE_DISABLED)
>  		return -ENODEV;
>  
> +	if (ct->state == XE_GUC_CT_STATE_DROP_MESSAGES)
> +		return -ECANCELED;
> +
>  	if (g2h->info.broken)
>  		return -EPIPE;
>  
> +	xe_assert(xe, xe_guc_ct_enabled(ct));
> +
>  	/* Calculate DW available to read */
>  	tail = desc_read(xe, g2h, tail);
>  	avail = tail - g2h->info.head;
> @@ -1291,7 +1314,7 @@ struct xe_guc_ct_snapshot *xe_guc_ct_snapshot_capture(struct xe_guc_ct *ct,
>  		return NULL;
>  	}
>  
> -	if (ct->enabled) {
> +	if (xe_guc_ct_enabled(ct)) {
>  		snapshot->ct_enabled = true;
>  		snapshot->g2h_outstanding = READ_ONCE(ct->g2h_outstanding);
>  		guc_ctb_snapshot_capture(xe, &ct->ctbs.h2g,
> diff --git a/drivers/gpu/drm/xe/xe_guc_ct.h b/drivers/gpu/drm/xe/xe_guc_ct.h
> index f15f8a4857e0..214a6a357519 100644
> --- a/drivers/gpu/drm/xe/xe_guc_ct.h
> +++ b/drivers/gpu/drm/xe/xe_guc_ct.h
> @@ -13,6 +13,7 @@ struct drm_printer;
>  int xe_guc_ct_init(struct xe_guc_ct *ct);
>  int xe_guc_ct_enable(struct xe_guc_ct *ct);
>  void xe_guc_ct_disable(struct xe_guc_ct *ct);
> +void xe_guc_ct_drop_messages(struct xe_guc_ct *ct);
>  void xe_guc_ct_fast_path(struct xe_guc_ct *ct);
>  
>  struct xe_guc_ct_snapshot *
> @@ -22,10 +23,15 @@ void xe_guc_ct_snapshot_print(struct xe_guc_ct_snapshot *snapshot,
>  void xe_guc_ct_snapshot_free(struct xe_guc_ct_snapshot *snapshot);
>  void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p, bool atomic);
>  
> +static inline bool xe_guc_ct_enabled(struct xe_guc_ct *ct)
> +{
> +	return ct->state == XE_GUC_CT_STATE_ENABLED;
> +}
> +
>  static inline void xe_guc_ct_irq_handler(struct xe_guc_ct *ct)
>  {
>  	wake_up_all(&ct->wq);
> -	if (ct->enabled)
> +	if (xe_guc_ct_enabled(ct))
>  		queue_work(system_unbound_wq, &ct->g2h_worker);
>  	xe_guc_ct_fast_path(ct);

if we are not-enabled, shouldn't we treat all G2H messages in the same
way? why do we continue with fast_path here?

>  }
> diff --git a/drivers/gpu/drm/xe/xe_guc_ct_types.h b/drivers/gpu/drm/xe/xe_guc_ct_types.h
> index d814d4ee3fc6..f74d38c8f9df 100644
> --- a/drivers/gpu/drm/xe/xe_guc_ct_types.h
> +++ b/drivers/gpu/drm/xe/xe_guc_ct_types.h
> @@ -72,6 +72,20 @@ struct xe_guc_ct_snapshot {
>  	struct guc_ctb_snapshot h2g;
>  };
>  
> +/**
> + * enum xe_guc_ct_state - CT state
> + * @XE_GUC_CT_STATE_NOT_INITIALIZED: CT suspended, messages not expected in this state

"suspended" seems wrong.

IIUC this is a default implicit state where CT was not yet initialized,
not a state where CT was _temporary_ suspended.

> + * @XE_GUC_CT_STATE_DISABLED: CT disabled, messages not expected in this state
> + * @XE_GUC_CT_STATE_DROP_MESSAGES: CT drops messages without errors

maybe I'm missing something, but shouldn't we just stop processing any
incoming messages and reject sending new one in this state ?

"drops messages" suggests that we are reading G2H CTB *and* then
dropping them without proper processing

> + * @XE_GUC_CT_STATE_ENABLED: CT enabled, messages sent / recieved in this state

typo

> + */
> +enum xe_guc_ct_state {
> +	XE_GUC_CT_STATE_NOT_INITIALIZED = 0,

hmm, maybe we don't need this state at all, since CT initialization
failure is fatal and we will abort driver probe

maybe all we need is to explicitly set XE_GUC_CT_STATE_DISABLED(1) in
xe_guc_ct_init() and use xe_assert(ct.state) to catch missing
initialization ?

> +	XE_GUC_CT_STATE_DISABLED,
> +	XE_GUC_CT_STATE_DROP_MESSAGES,
> +	XE_GUC_CT_STATE_ENABLED,
> +};
> +
>  /**
>   * struct xe_guc_ct - GuC command transport (CT) layer
>   *
> @@ -96,8 +110,8 @@ struct xe_guc_ct {
>  	u32 g2h_outstanding;
>  	/** @g2h_worker: worker to process G2H messages */
>  	struct work_struct g2h_worker;
> -	/** @enabled: CT enabled */
> -	bool enabled;
> +	/** @state: CT state */
> +	enum xe_guc_ct_state state;
>  	/** @fence_seqno: G2H fence seqno - 16 bits used by CT */
>  	u32 fence_seqno;
>  	/** @fence_lookup: G2H fence lookup */

  reply	other threads:[~2023-12-29 23:36 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-12-29  4:35 [PATCH 0/4] GuC CT tweaks Matthew Brost
2023-12-29  4:35 ` [PATCH 1/4] drm/xe/guc: Add more GuC CT states Matthew Brost
2023-12-29 23:36   ` Michal Wajdeczko [this message]
2024-01-02 16:52     ` Matthew Brost
2023-12-29  4:35 ` [PATCH 2/4] drm/xe: Move TLB invalidation reset before HW reset Matthew Brost
2023-12-29  4:35 ` [PATCH 3/4] drm/xe/guc: Protect queuing of G2H handler with fast lock Matthew Brost
2023-12-29  4:35 ` [PATCH 4/4] drm/xe/guc: Flush G2H handler when turning off CTs Matthew Brost
2024-01-04  4:03 ` ✓ CI.Patch_applied: success for GuC CT tweaks Patchwork
2024-01-04  4:03 ` ✗ CI.checkpatch: warning " Patchwork
2024-01-04  4:04 ` ✓ CI.KUnit: success " Patchwork
2024-01-04  4:11 ` ✓ CI.Build: " Patchwork
2024-01-04  4:12 ` ✓ CI.Hooks: " Patchwork
2024-01-04  4:13 ` ✓ CI.checksparse: " Patchwork
2024-01-04  4:48 ` ✓ CI.BAT: " Patchwork

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=6c1429e3-5b0d-4aa9-b244-354dc9b780ac@intel.com \
    --to=michal.wajdeczko@intel.com \
    --cc=intel-xe@lists.freedesktop.org \
    --cc=matthew.brost@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox