Intel-XE Archive on lore.kernel.org
 help / color / mirror / Atom feed
From: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
To: Badal Nilawar <badal.nilawar@intel.com>,
	<intel-xe@lists.freedesktop.org>,
	 <dri-devel@lists.freedesktop.org>
Cc: <anshuman.gupta@intel.com>, <rodrigo.vivi@intel.com>,
	<alexander.usyskin@intel.com>, <gregkh@linuxfoundation.org>,
	<jgg@nvidia.com>
Subject: Re: [PATCH v2 05/10] drm/xe/xe_late_bind_fw: Load late binding firmware
Date: Tue, 10 Jun 2025 17:17:24 -0700	[thread overview]
Message-ID: <a51e1b65-dece-4ac4-808d-8749a1a21d8f@intel.com> (raw)
In-Reply-To: <20250606175707.1403384-6-badal.nilawar@intel.com>



On 6/6/2025 10:57 AM, Badal Nilawar wrote:
> Load late binding firmware
>
> v2:
>   - s/EAGAIN/EBUSY/
>   - Flush worker in suspend and driver unload (Daniele)
>
> Signed-off-by: Badal Nilawar <badal.nilawar@intel.com>
> ---
>   drivers/gpu/drm/xe/xe_late_bind_fw.c       | 121 ++++++++++++++++++++-
>   drivers/gpu/drm/xe/xe_late_bind_fw.h       |   1 +
>   drivers/gpu/drm/xe/xe_late_bind_fw_types.h |   5 +
>   3 files changed, 126 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/xe/xe_late_bind_fw.c b/drivers/gpu/drm/xe/xe_late_bind_fw.c
> index 0231f3dcfc18..7fe304c54374 100644
> --- a/drivers/gpu/drm/xe/xe_late_bind_fw.c
> +++ b/drivers/gpu/drm/xe/xe_late_bind_fw.c
> @@ -16,6 +16,16 @@
>   #include "xe_late_bind_fw.h"
>   #include "xe_pcode.h"
>   #include "xe_pcode_api.h"
> +#include "xe_pm.h"
> +
> +/*
> + * The component should load quite quickly in most cases, but it could take
> + * a bit. Using a very big timeout just to cover the worst case scenario
> + */
> +#define LB_INIT_TIMEOUT_MS 20000
> +
> +#define LB_FW_LOAD_RETRY_MAXCOUNT 40
> +#define LB_FW_LOAD_RETRY_PAUSE_MS 50

Are those retry values spec'd anywhere? For GSC we use those because the 
GSC specs say to retry in 50ms intervals for up to 2 secs to give time 
for the GSC to do proxy handling. Does it make sense to do the same in 
this case, given that there is no proxy involved?

>   
>   static const char * const fw_type_to_name[] = {
>   		[CSC_LATE_BINDING_TYPE_FAN_CONTROL] = "fan_control",
> @@ -39,6 +49,95 @@ static int late_bind_fw_num_fans(struct xe_late_bind *late_bind)
>   		return 0;
>   }
>   
> +static void xe_late_bind_wait_for_worker_completion(struct xe_late_bind *late_bind)
> +{
> +	struct xe_device *xe = late_bind_to_xe(late_bind);
> +	struct xe_late_bind_fw *lbfw;
> +
> +	lbfw = &late_bind->late_bind_fw;
> +	if (lbfw->valid && late_bind->wq) {
> +		drm_dbg(&xe->drm, "Flush work: load %s firmware\n",
> +			fw_type_to_name[lbfw->type]);
> +			flush_work(&lbfw->work);
> +	}
> +}
> +
> +static void late_bind_work(struct work_struct *work)
> +{
> +	struct xe_late_bind_fw *lbfw = container_of(work, struct xe_late_bind_fw, work);
> +	struct xe_late_bind *late_bind = container_of(lbfw, struct xe_late_bind,
> +						      late_bind_fw);
> +	struct xe_device *xe = late_bind_to_xe(late_bind);
> +	int retry = LB_FW_LOAD_RETRY_MAXCOUNT;
> +	int ret;
> +	int slept;
> +
> +	if (!late_bind->component_added)
> +		return;
> +
> +	if (!lbfw->valid)
> +		return;

The first check is redundant because lbfw->valid can't be true if 
late_bind->component_added is false with the current code.

> +
> +	/* we can queue this before the component is bound */
> +	for (slept = 0; slept < LB_INIT_TIMEOUT_MS; slept += 100) {
> +		if (late_bind->component.ops)
> +			break;
> +		msleep(100);
> +	}
> +
> +	xe_pm_runtime_get(xe);
> +	mutex_lock(&late_bind->mutex);
> +
> +	if (!late_bind->component.ops) {
> +		drm_err(&xe->drm, "Late bind component not bound\n");
> +		goto out;
> +	}
> +
> +	drm_dbg(&xe->drm, "Load %s firmware\n", fw_type_to_name[lbfw->type]);
> +
> +	do {
> +		ret = late_bind->component.ops->push_config(late_bind->component.mei_dev,
> +							    lbfw->type, lbfw->flags,
> +							    lbfw->payload, lbfw->payload_size);
> +		if (!ret)
> +			break;
> +		msleep(LB_FW_LOAD_RETRY_PAUSE_MS);
> +	} while (--retry && ret == -EBUSY);
> +
> +	if (ret)
> +		drm_err(&xe->drm, "Load %s firmware failed with err %d\n",
> +			fw_type_to_name[lbfw->type], ret);
> +	else
> +		drm_dbg(&xe->drm, "Load %s firmware successful\n",
> +			fw_type_to_name[lbfw->type]);
> +out:
> +	mutex_unlock(&late_bind->mutex);
> +	xe_pm_runtime_put(xe);
> +}
> +
> +int xe_late_bind_fw_load(struct xe_late_bind *late_bind)
> +{
> +	struct xe_device *xe = late_bind_to_xe(late_bind);
> +	struct xe_late_bind_fw *lbfw;
> +
> +	if (!late_bind->component_added)
> +		return -EINVAL;
> +
> +	lbfw = &late_bind->late_bind_fw;
> +	if (lbfw->valid) {
> +		drm_dbg(&xe->drm, "Queue work: to load %s firmware\n",
> +			fw_type_to_name[lbfw->type]);

This log seems a bit too specific, also given that you also have logs 
inside the work

Daniele

> +			queue_work(late_bind->wq, &lbfw->work);
> +	}
> +
> +	return 0;
> +}
> +
> +/**
> + * late_bind_fw_init() - initialize late bind firmware
> + *
> + * Return: 0 if the initialization was successful, a negative errno otherwise.
> + */
>   static int late_bind_fw_init(struct xe_late_bind *late_bind, u32 type)
>   {
>   	struct xe_device *xe = late_bind_to_xe(late_bind);
> @@ -87,6 +186,7 @@ static int late_bind_fw_init(struct xe_late_bind *late_bind, u32 type)
>   
>   	memcpy(lb_fw->payload, fw->data, lb_fw->payload_size);
>   	release_firmware(fw);
> +	INIT_WORK(&lb_fw->work, late_bind_work);
>   	lb_fw->valid = true;
>   
>   	return 0;
> @@ -99,7 +199,17 @@ static int late_bind_fw_init(struct xe_late_bind *late_bind, u32 type)
>    */
>   int xe_late_bind_fw_init(struct xe_late_bind *late_bind)
>   {
> -	return late_bind_fw_init(late_bind, CSC_LATE_BINDING_TYPE_FAN_CONTROL);
> +	int ret;
> +
> +	late_bind->wq = alloc_ordered_workqueue("late-bind-ordered-wq", 0);
> +	if (!late_bind->wq)
> +		return -ENOMEM;
> +
> +	ret = late_bind_fw_init(late_bind, CSC_LATE_BINDING_TYPE_FAN_CONTROL);
> +	if (ret)
> +		return ret;
> +
> +	return xe_late_bind_fw_load(late_bind);
>   }
>   
>   static int xe_late_bind_component_bind(struct device *xe_kdev,
> @@ -122,6 +232,8 @@ static void xe_late_bind_component_unbind(struct device *xe_kdev,
>   	struct xe_device *xe = kdev_to_xe_device(xe_kdev);
>   	struct xe_late_bind *late_bind = &xe->late_bind;
>   
> +	xe_late_bind_wait_for_worker_completion(late_bind);
> +
>   	mutex_lock(&late_bind->mutex);
>   	late_bind->component.ops = NULL;
>   	mutex_unlock(&late_bind->mutex);
> @@ -140,9 +252,16 @@ static void xe_late_bind_remove(void *arg)
>   	if (!late_bind->component_added)
>   		return;
>   
> +	xe_late_bind_wait_for_worker_completion(late_bind);
> +
>   	component_del(xe->drm.dev, &xe_late_bind_component_ops);
>   	late_bind->component_added = false;
> +	if (late_bind->wq) {
> +		destroy_workqueue(late_bind->wq);
> +		late_bind->wq = NULL;
> +	}
>   	mutex_destroy(&late_bind->mutex);
> +
>   }
>   
>   /**
> diff --git a/drivers/gpu/drm/xe/xe_late_bind_fw.h b/drivers/gpu/drm/xe/xe_late_bind_fw.h
> index a8b47523b203..166957e84c2f 100644
> --- a/drivers/gpu/drm/xe/xe_late_bind_fw.h
> +++ b/drivers/gpu/drm/xe/xe_late_bind_fw.h
> @@ -12,5 +12,6 @@ struct xe_late_bind;
>   
>   int xe_late_bind_init(struct xe_late_bind *late_bind);
>   int xe_late_bind_fw_init(struct xe_late_bind *late_bind);
> +int xe_late_bind_fw_load(struct xe_late_bind *late_bind);
>   
>   #endif
> diff --git a/drivers/gpu/drm/xe/xe_late_bind_fw_types.h b/drivers/gpu/drm/xe/xe_late_bind_fw_types.h
> index c427e141c685..690680e8ff22 100644
> --- a/drivers/gpu/drm/xe/xe_late_bind_fw_types.h
> +++ b/drivers/gpu/drm/xe/xe_late_bind_fw_types.h
> @@ -9,6 +9,7 @@
>   #include <linux/iosys-map.h>
>   #include <linux/mutex.h>
>   #include <linux/types.h>
> +#include <linux/workqueue.h>
>   
>   #define MAX_PAYLOAD_SIZE (1024 * 4)
>   
> @@ -43,6 +44,8 @@ struct xe_late_bind_fw {
>   	u8  payload[MAX_PAYLOAD_SIZE];
>   	/** @late_bind_fw.payload_size: late binding blob payload_size */
>   	size_t payload_size;
> +	/** @late_bind_fw.work: worker to upload latebind blob */
> +	struct work_struct work;
>   };
>   
>   /**
> @@ -71,6 +74,8 @@ struct xe_late_bind {
>   	struct mutex mutex;
>   	/** @late_bind.late_bind_fw: late binding firmware */
>   	struct xe_late_bind_fw late_bind_fw;
> +	/** @late_bind.wq: workqueue to submit request to download late bind blob */
> +	struct workqueue_struct *wq;
>   };
>   
>   #endif


  reply	other threads:[~2025-06-11  0:18 UTC|newest]

Thread overview: 28+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-06-06 17:56 [PATCH v2 00/10] Introducing firmware late binding Badal Nilawar
2025-06-06 17:56 ` [PATCH v2 01/10] mei: bus: add mei_cldev_mtu interface Badal Nilawar
2025-06-15 18:19   ` Umesh Nerlige Ramappa
2025-06-06 17:56 ` [PATCH v2 02/10] mei: late_bind: add late binding component driver Badal Nilawar
2025-06-14  8:02   ` Gupta, Anshuman
2025-06-16 15:13     ` Nilawar, Badal
2025-06-06 17:57 ` [PATCH v2 03/10] drm/xe/xe_late_bind_fw: Introducing xe_late_bind_fw Badal Nilawar
2025-06-10 21:47   ` Daniele Ceraolo Spurio
2025-06-14  9:57   ` Gupta, Anshuman
2025-06-06 17:57 ` [PATCH v2 04/10] drm/xe/xe_late_bind_fw: Initialize late binding firmware Badal Nilawar
2025-06-11  0:06   ` Daniele Ceraolo Spurio
2025-06-12 10:35     ` Nilawar, Badal
2025-06-12 15:11       ` Nilawar, Badal
2025-06-06 17:57 ` [PATCH v2 05/10] drm/xe/xe_late_bind_fw: Load " Badal Nilawar
2025-06-11  0:17   ` Daniele Ceraolo Spurio [this message]
2025-06-12 11:54     ` Usyskin, Alexander
2025-06-12 13:26       ` Nilawar, Badal
2025-06-12 13:31     ` Nilawar, Badal
2025-06-15 18:26   ` Umesh Nerlige Ramappa
2025-06-17  9:00     ` Nilawar, Badal
2025-06-06 17:57 ` [PATCH v2 06/10] drm/xe/xe_late_bind_fw: Reload late binding fw in rpm resume Badal Nilawar
2025-06-06 17:57 ` [PATCH v2 07/10] drm/xe/xe_late_bind_fw: Reload late binding fw in S2Idle/S3 resume Badal Nilawar
2025-06-06 17:57 ` [PATCH v2 08/10] drm/xe/xe_late_bind_fw: Introduce debug fs node to disable late binding Badal Nilawar
2025-06-12 21:28   ` Daniele Ceraolo Spurio
2025-06-06 17:57 ` [PATCH v2 09/10] {fwctl,drm}/xe/pcode: Introduce xe_pcode_fwctl Badal Nilawar
2025-06-16 14:42   ` Rodrigo Vivi
2025-06-06 17:57 ` [PATCH v2 10/10] [CI]drm/xe/xe_late_bind_fw: Select INTEL_MEI_LATE_BIND for CI Do not review Badal Nilawar
2025-06-07  8:02   ` kernel test robot

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=a51e1b65-dece-4ac4-808d-8749a1a21d8f@intel.com \
    --to=daniele.ceraolospurio@intel.com \
    --cc=alexander.usyskin@intel.com \
    --cc=anshuman.gupta@intel.com \
    --cc=badal.nilawar@intel.com \
    --cc=dri-devel@lists.freedesktop.org \
    --cc=gregkh@linuxfoundation.org \
    --cc=intel-xe@lists.freedesktop.org \
    --cc=jgg@nvidia.com \
    --cc=rodrigo.vivi@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox