All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jani Nikula <jani.nikula@linux.intel.com>
To: Ashutosh Dixit <ashutosh.dixit@intel.com>,
	intel-gfx@lists.freedesktop.org
Cc: dri-devel@lists.freedesktop.org, Rodrigo Vivi <rodrigo.vivi@intel.com>
Subject: Re: [Intel-gfx] [PATCH v2] drm/i915/guc: Disable PL1 power limit when loading GuC firmware
Date: Tue, 14 Mar 2023 11:35:07 +0200	[thread overview]
Message-ID: <87fsa77k1g.fsf@intel.com> (raw)
In-Reply-To: <20230313234936.2005565-1-ashutosh.dixit@intel.com>

On Mon, 13 Mar 2023, Ashutosh Dixit <ashutosh.dixit@intel.com> wrote:
> On dGfx, the PL1 power limit being enabled and set to a low value results
> in a low GPU operating freq. It also negates the freq raise operation which
> is done before GuC firmware load. As a result GuC firmware load can time
> out. Such timeouts were seen in the GL #8062 bug below (where the PL1 power
> limit was enabled and set to a low value). Therefore disable the PL1 power
> limit when allowed by HW when loading GuC firmware.
>
> v2:
>  - Take mutex (to disallow writes to power1_max) across GuC reset/fw load
>  - Add hwm_power_max_restore to error return code path
>
> Link: https://gitlab.freedesktop.org/drm/intel/-/issues/8062
> Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
> ---
>  drivers/gpu/drm/i915/gt/uc/intel_uc.c | 10 ++++++-
>  drivers/gpu/drm/i915/i915_hwmon.c     | 39 +++++++++++++++++++++++++++
>  drivers/gpu/drm/i915/i915_hwmon.h     |  7 +++++
>  3 files changed, 55 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
> index 4ccb4be4c9cb..15f8e94edc61 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
> @@ -18,6 +18,7 @@
>  #include "intel_uc.h"
>  
>  #include "i915_drv.h"
> +#include "i915_hwmon.h"
>  
>  static const struct intel_uc_ops uc_ops_off;
>  static const struct intel_uc_ops uc_ops_on;
> @@ -460,7 +461,7 @@ static int __uc_init_hw(struct intel_uc *uc)
>  	struct drm_i915_private *i915 = gt->i915;
>  	struct intel_guc *guc = &uc->guc;
>  	struct intel_huc *huc = &uc->huc;
> -	int ret, attempts;
> +	int ret, attempts, pl1en;
>  
>  	GEM_BUG_ON(!intel_uc_supports_guc(uc));
>  	GEM_BUG_ON(!intel_uc_wants_guc(uc));
> @@ -491,6 +492,9 @@ static int __uc_init_hw(struct intel_uc *uc)
>  	else
>  		attempts = 1;
>  
> +	/* Disable PL1 limit before raising freq */

That's just duplicating what the code says; a few words on the why might
be helpful.

> +	hwm_power_max_disable(gt, &pl1en);
> +
>  	intel_rps_raise_unslice(&uc_to_gt(uc)->rps);
>  
>  	while (attempts--) {
> @@ -547,6 +551,8 @@ static int __uc_init_hw(struct intel_uc *uc)
>  		intel_rps_lower_unslice(&uc_to_gt(uc)->rps);
>  	}
>  
> +	hwm_power_max_restore(gt, pl1en); /* Restore PL1 limit */
> +
>  	guc_info(guc, "submission %s\n", str_enabled_disabled(intel_uc_uses_guc_submission(uc)));
>  	guc_info(guc, "SLPC %s\n", str_enabled_disabled(intel_uc_uses_guc_slpc(uc)));
>  
> @@ -563,6 +569,8 @@ static int __uc_init_hw(struct intel_uc *uc)
>  	/* Return GT back to RPn */
>  	intel_rps_lower_unslice(&uc_to_gt(uc)->rps);
>  
> +	hwm_power_max_restore(gt, pl1en); /* Restore PL1 limit */

Ditto about code and comment duplicating the same thing.

Also, we don't use end of the line comments very much. 

> +
>  	__uc_sanitize(uc);
>  
>  	if (!ret) {
> diff --git a/drivers/gpu/drm/i915/i915_hwmon.c b/drivers/gpu/drm/i915/i915_hwmon.c
> index ee63a8fd88fc..2bbca75ac477 100644
> --- a/drivers/gpu/drm/i915/i915_hwmon.c
> +++ b/drivers/gpu/drm/i915/i915_hwmon.c
> @@ -444,6 +444,45 @@ hwm_power_write(struct hwm_drvdata *ddat, u32 attr, int chan, long val)
>  	}
>  }
>  
> +void hwm_power_max_disable(struct intel_gt *gt, u32 *old)
> +{
> +	struct i915_hwmon *hwmon = gt->i915->hwmon;
> +	intel_wakeref_t wakeref;
> +	u32 r;
> +
> +	if (!hwmon || !i915_mmio_reg_valid(hwmon->rg.pkg_rapl_limit))
> +		return;
> +
> +	/* Take mutex to prevent concurrent hwm_power_max_write */
> +	mutex_lock(&hwmon->hwmon_lock);
> +
> +	with_intel_runtime_pm(hwmon->ddat.uncore->rpm, wakeref)
> +		r = intel_uncore_rmw(hwmon->ddat.uncore,
> +				     hwmon->rg.pkg_rapl_limit,
> +				     PKG_PWR_LIM_1_EN, 0);
> +
> +	*old = !!(r & PKG_PWR_LIM_1_EN);

If you only need a bool, why do you use a u32?

> +
> +	/* hwmon_lock mutex is unlocked in hwm_power_max_restore */

Not too happy about that... any better ideas?

At the very minimum the functions need locking annotations. Otherwise
we'll get an influx of static analyser complaints.

> +}
> +
> +void hwm_power_max_restore(struct intel_gt *gt, u32 old)
> +{
> +	struct i915_hwmon *hwmon = gt->i915->hwmon;
> +	intel_wakeref_t wakeref;
> +
> +	if (!hwmon || !i915_mmio_reg_valid(hwmon->rg.pkg_rapl_limit))
> +		return;
> +
> +	with_intel_runtime_pm(hwmon->ddat.uncore->rpm, wakeref)
> +		intel_uncore_rmw(hwmon->ddat.uncore,
> +				 hwmon->rg.pkg_rapl_limit,
> +				 PKG_PWR_LIM_1_EN,
> +				 old ? PKG_PWR_LIM_1_EN : 0);
> +
> +	mutex_unlock(&hwmon->hwmon_lock);
> +}
> +
>  static umode_t
>  hwm_energy_is_visible(const struct hwm_drvdata *ddat, u32 attr)
>  {
> diff --git a/drivers/gpu/drm/i915/i915_hwmon.h b/drivers/gpu/drm/i915/i915_hwmon.h
> index 7ca9cf2c34c9..0c2db11be2e2 100644
> --- a/drivers/gpu/drm/i915/i915_hwmon.h
> +++ b/drivers/gpu/drm/i915/i915_hwmon.h
> @@ -7,14 +7,21 @@
>  #ifndef __I915_HWMON_H__
>  #define __I915_HWMON_H__
>  
> +#include <linux/types.h>
> +
>  struct drm_i915_private;
> +struct intel_gt;
>  
>  #if IS_REACHABLE(CONFIG_HWMON)
>  void i915_hwmon_register(struct drm_i915_private *i915);
>  void i915_hwmon_unregister(struct drm_i915_private *i915);
> +void hwm_power_max_disable(struct intel_gt *gt, u32 *old);
> +void hwm_power_max_restore(struct intel_gt *gt, u32 old);

Naming should be i915_hwmon_ prefixed following the usual conventions.

Why is the variable intel_gt instead of i915? There's nothing gt
specific in the added code?

>  #else
>  static inline void i915_hwmon_register(struct drm_i915_private *i915) { };
>  static inline void i915_hwmon_unregister(struct drm_i915_private *i915) { };
> +void hwm_power_max_disable(struct intel_gt *gt, u32 *old) { };
> +void hwm_power_max_restore(struct intel_gt *gt, u32 old) { };

These need to be static inline.

BR,
Jani.

>  #endif
>  
>  #endif /* __I915_HWMON_H__ */

-- 
Jani Nikula, Intel Open Source Graphics Center

WARNING: multiple messages have this Message-ID (diff)
From: Jani Nikula <jani.nikula@linux.intel.com>
To: Ashutosh Dixit <ashutosh.dixit@intel.com>,
	intel-gfx@lists.freedesktop.org
Cc: Vinay Belgaumkar <vinay.belgaumkar@intel.com>,
	dri-devel@lists.freedesktop.org,
	Badal Nilawar <badal.nilawar@intel.com>,
	John Harrison <john.c.harrison@intel.com>,
	Rodrigo Vivi <rodrigo.vivi@intel.com>
Subject: Re: [PATCH v2] drm/i915/guc: Disable PL1 power limit when loading GuC firmware
Date: Tue, 14 Mar 2023 11:35:07 +0200	[thread overview]
Message-ID: <87fsa77k1g.fsf@intel.com> (raw)
In-Reply-To: <20230313234936.2005565-1-ashutosh.dixit@intel.com>

On Mon, 13 Mar 2023, Ashutosh Dixit <ashutosh.dixit@intel.com> wrote:
> On dGfx, the PL1 power limit being enabled and set to a low value results
> in a low GPU operating freq. It also negates the freq raise operation which
> is done before GuC firmware load. As a result GuC firmware load can time
> out. Such timeouts were seen in the GL #8062 bug below (where the PL1 power
> limit was enabled and set to a low value). Therefore disable the PL1 power
> limit when allowed by HW when loading GuC firmware.
>
> v2:
>  - Take mutex (to disallow writes to power1_max) across GuC reset/fw load
>  - Add hwm_power_max_restore to error return code path
>
> Link: https://gitlab.freedesktop.org/drm/intel/-/issues/8062
> Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
> ---
>  drivers/gpu/drm/i915/gt/uc/intel_uc.c | 10 ++++++-
>  drivers/gpu/drm/i915/i915_hwmon.c     | 39 +++++++++++++++++++++++++++
>  drivers/gpu/drm/i915/i915_hwmon.h     |  7 +++++
>  3 files changed, 55 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
> index 4ccb4be4c9cb..15f8e94edc61 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
> @@ -18,6 +18,7 @@
>  #include "intel_uc.h"
>  
>  #include "i915_drv.h"
> +#include "i915_hwmon.h"
>  
>  static const struct intel_uc_ops uc_ops_off;
>  static const struct intel_uc_ops uc_ops_on;
> @@ -460,7 +461,7 @@ static int __uc_init_hw(struct intel_uc *uc)
>  	struct drm_i915_private *i915 = gt->i915;
>  	struct intel_guc *guc = &uc->guc;
>  	struct intel_huc *huc = &uc->huc;
> -	int ret, attempts;
> +	int ret, attempts, pl1en;
>  
>  	GEM_BUG_ON(!intel_uc_supports_guc(uc));
>  	GEM_BUG_ON(!intel_uc_wants_guc(uc));
> @@ -491,6 +492,9 @@ static int __uc_init_hw(struct intel_uc *uc)
>  	else
>  		attempts = 1;
>  
> +	/* Disable PL1 limit before raising freq */

That's just duplicating what the code says; a few words on the why might
be helpful.

> +	hwm_power_max_disable(gt, &pl1en);
> +
>  	intel_rps_raise_unslice(&uc_to_gt(uc)->rps);
>  
>  	while (attempts--) {
> @@ -547,6 +551,8 @@ static int __uc_init_hw(struct intel_uc *uc)
>  		intel_rps_lower_unslice(&uc_to_gt(uc)->rps);
>  	}
>  
> +	hwm_power_max_restore(gt, pl1en); /* Restore PL1 limit */
> +
>  	guc_info(guc, "submission %s\n", str_enabled_disabled(intel_uc_uses_guc_submission(uc)));
>  	guc_info(guc, "SLPC %s\n", str_enabled_disabled(intel_uc_uses_guc_slpc(uc)));
>  
> @@ -563,6 +569,8 @@ static int __uc_init_hw(struct intel_uc *uc)
>  	/* Return GT back to RPn */
>  	intel_rps_lower_unslice(&uc_to_gt(uc)->rps);
>  
> +	hwm_power_max_restore(gt, pl1en); /* Restore PL1 limit */

Ditto about code and comment duplicating the same thing.

Also, we don't use end of the line comments very much. 

> +
>  	__uc_sanitize(uc);
>  
>  	if (!ret) {
> diff --git a/drivers/gpu/drm/i915/i915_hwmon.c b/drivers/gpu/drm/i915/i915_hwmon.c
> index ee63a8fd88fc..2bbca75ac477 100644
> --- a/drivers/gpu/drm/i915/i915_hwmon.c
> +++ b/drivers/gpu/drm/i915/i915_hwmon.c
> @@ -444,6 +444,45 @@ hwm_power_write(struct hwm_drvdata *ddat, u32 attr, int chan, long val)
>  	}
>  }
>  
> +void hwm_power_max_disable(struct intel_gt *gt, u32 *old)
> +{
> +	struct i915_hwmon *hwmon = gt->i915->hwmon;
> +	intel_wakeref_t wakeref;
> +	u32 r;
> +
> +	if (!hwmon || !i915_mmio_reg_valid(hwmon->rg.pkg_rapl_limit))
> +		return;
> +
> +	/* Take mutex to prevent concurrent hwm_power_max_write */
> +	mutex_lock(&hwmon->hwmon_lock);
> +
> +	with_intel_runtime_pm(hwmon->ddat.uncore->rpm, wakeref)
> +		r = intel_uncore_rmw(hwmon->ddat.uncore,
> +				     hwmon->rg.pkg_rapl_limit,
> +				     PKG_PWR_LIM_1_EN, 0);
> +
> +	*old = !!(r & PKG_PWR_LIM_1_EN);

If you only need a bool, why do you use a u32?

> +
> +	/* hwmon_lock mutex is unlocked in hwm_power_max_restore */

Not too happy about that... any better ideas?

At the very minimum the functions need locking annotations. Otherwise
we'll get an influx of static analyser complaints.

> +}
> +
> +void hwm_power_max_restore(struct intel_gt *gt, u32 old)
> +{
> +	struct i915_hwmon *hwmon = gt->i915->hwmon;
> +	intel_wakeref_t wakeref;
> +
> +	if (!hwmon || !i915_mmio_reg_valid(hwmon->rg.pkg_rapl_limit))
> +		return;
> +
> +	with_intel_runtime_pm(hwmon->ddat.uncore->rpm, wakeref)
> +		intel_uncore_rmw(hwmon->ddat.uncore,
> +				 hwmon->rg.pkg_rapl_limit,
> +				 PKG_PWR_LIM_1_EN,
> +				 old ? PKG_PWR_LIM_1_EN : 0);
> +
> +	mutex_unlock(&hwmon->hwmon_lock);
> +}
> +
>  static umode_t
>  hwm_energy_is_visible(const struct hwm_drvdata *ddat, u32 attr)
>  {
> diff --git a/drivers/gpu/drm/i915/i915_hwmon.h b/drivers/gpu/drm/i915/i915_hwmon.h
> index 7ca9cf2c34c9..0c2db11be2e2 100644
> --- a/drivers/gpu/drm/i915/i915_hwmon.h
> +++ b/drivers/gpu/drm/i915/i915_hwmon.h
> @@ -7,14 +7,21 @@
>  #ifndef __I915_HWMON_H__
>  #define __I915_HWMON_H__
>  
> +#include <linux/types.h>
> +
>  struct drm_i915_private;
> +struct intel_gt;
>  
>  #if IS_REACHABLE(CONFIG_HWMON)
>  void i915_hwmon_register(struct drm_i915_private *i915);
>  void i915_hwmon_unregister(struct drm_i915_private *i915);
> +void hwm_power_max_disable(struct intel_gt *gt, u32 *old);
> +void hwm_power_max_restore(struct intel_gt *gt, u32 old);

Naming should be i915_hwmon_ prefixed following the usual conventions.

Why is the variable intel_gt instead of i915? There's nothing gt
specific in the added code?

>  #else
>  static inline void i915_hwmon_register(struct drm_i915_private *i915) { };
>  static inline void i915_hwmon_unregister(struct drm_i915_private *i915) { };
> +void hwm_power_max_disable(struct intel_gt *gt, u32 *old) { };
> +void hwm_power_max_restore(struct intel_gt *gt, u32 old) { };

These need to be static inline.

BR,
Jani.

>  #endif
>  
>  #endif /* __I915_HWMON_H__ */

-- 
Jani Nikula, Intel Open Source Graphics Center

  parent reply	other threads:[~2023-03-14  9:35 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-03-13 23:49 [Intel-gfx] [PATCH v2] drm/i915/guc: Disable PL1 power limit when loading GuC firmware Ashutosh Dixit
2023-03-13 23:49 ` Ashutosh Dixit
2023-03-14  2:54 ` [Intel-gfx] " kernel test robot
2023-03-14  2:54   ` kernel test robot
2023-03-14  2:54   ` kernel test robot
2023-03-14  9:35 ` Jani Nikula [this message]
2023-03-14  9:35   ` Jani Nikula
2023-03-16  4:01   ` [Intel-gfx] " Dixit, Ashutosh
2023-03-16  4:01     ` Dixit, Ashutosh

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=87fsa77k1g.fsf@intel.com \
    --to=jani.nikula@linux.intel.com \
    --cc=ashutosh.dixit@intel.com \
    --cc=dri-devel@lists.freedesktop.org \
    --cc=intel-gfx@lists.freedesktop.org \
    --cc=rodrigo.vivi@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.