Intel-GFX Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] drm/i915: Fix conversion between clock ticks and nanoseconds
@ 2025-10-07 23:35 Umesh Nerlige Ramappa
  2025-10-08  4:52 ` ✓ i915.CI.BAT: success for " Patchwork
                   ` (3 more replies)
  0 siblings, 4 replies; 19+ messages in thread
From: Umesh Nerlige Ramappa @ 2025-10-07 23:35 UTC (permalink / raw)
  To: intel-gfx, lucas.demarchi, riana.tauro, andi.shyti; +Cc: matthew.brost

When tick values are large, the multiplication by NSEC_PER_SEC is larger
than 64 bits and results in bad conversions.

The issue is seen in PMU busyness counters that look like they have
wrapped around due to bad conversion. i915 PMU implementation returns
monotonically increasing counters. If a count is lesser than previous
one, it will only return the larger value until the smaller value
catches up. The user will see this as zero delta between two
measurements even though the engines are busy.

Fix it by using a scaling factor to do the conversion. Add the same fix
for reverse conversion as well.

Closes: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/14955
Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
---
v2:
- Fix divide by zero for Gen11 (Andi)
- Update commit message
---
 .../gpu/drm/i915/gt/intel_gt_clock_utils.c    | 19 ++++++++++++++-----
 drivers/gpu/drm/i915/gt/intel_gt_types.h      |  2 ++
 2 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c b/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c
index 88b147fa5cb1..41a0e8622b33 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c
@@ -3,6 +3,8 @@
  * Copyright © 2020 Intel Corporation
  */
 
+#include <linux/gcd.h>
+
 #include "i915_drv.h"
 #include "i915_reg.h"
 #include "intel_gt.h"
@@ -171,7 +173,12 @@ static u32 read_clock_frequency(struct intel_uncore *uncore)
 
 void intel_gt_init_clock_frequency(struct intel_gt *gt)
 {
+	unsigned long clock_period_scale;
+
 	gt->clock_frequency = read_clock_frequency(gt->uncore);
+	clock_period_scale = gcd(NSEC_PER_SEC, gt->clock_frequency);
+	gt->clock_nsec_scaled = NSEC_PER_SEC / clock_period_scale;
+	gt->clock_freq_scaled = gt->clock_frequency / clock_period_scale;
 
 	/* Icelake appears to use another fixed frequency for CTX_TIMESTAMP */
 	if (GRAPHICS_VER(gt->i915) == 11)
@@ -180,11 +187,11 @@ void intel_gt_init_clock_frequency(struct intel_gt *gt)
 		gt->clock_period_ns = intel_gt_clock_interval_to_ns(gt, 1);
 
 	GT_TRACE(gt,
-		 "Using clock frequency: %dkHz, period: %dns, wrap: %lldms\n",
+		 "Using clock frequency: %dkHz, period: %dns, wrap: %lldms, scale %lu\n",
 		 gt->clock_frequency / 1000,
 		 gt->clock_period_ns,
-		 div_u64(mul_u32_u32(gt->clock_period_ns, S32_MAX),
-			 USEC_PER_SEC));
+		 div_u64(mul_u32_u32(gt->clock_period_ns, S32_MAX), USEC_PER_SEC),
+		 clock_period_scale);
 }
 
 #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
@@ -205,7 +212,8 @@ static u64 div_u64_roundup(u64 nom, u32 den)
 
 u64 intel_gt_clock_interval_to_ns(const struct intel_gt *gt, u64 count)
 {
-	return div_u64_roundup(count * NSEC_PER_SEC, gt->clock_frequency);
+	return div_u64_roundup(count * gt->clock_nsec_scaled,
+			       gt->clock_freq_scaled);
 }
 
 u64 intel_gt_pm_interval_to_ns(const struct intel_gt *gt, u64 count)
@@ -215,7 +223,8 @@ u64 intel_gt_pm_interval_to_ns(const struct intel_gt *gt, u64 count)
 
 u64 intel_gt_ns_to_clock_interval(const struct intel_gt *gt, u64 ns)
 {
-	return div_u64_roundup(gt->clock_frequency * ns, NSEC_PER_SEC);
+	return div_u64_roundup(gt->clock_freq_scaled * ns,
+			       gt->clock_nsec_scaled);
 }
 
 u64 intel_gt_ns_to_pm_interval(const struct intel_gt *gt, u64 ns)
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h
index bcee084b1f27..a19c568fcdc0 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
@@ -166,6 +166,8 @@ struct intel_gt {
 
 	u32 clock_frequency;
 	u32 clock_period_ns;
+	u32 clock_freq_scaled;
+	u32 clock_nsec_scaled;
 
 	struct intel_llc llc;
 	struct intel_rc6 rc6;
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 19+ messages in thread
* [PATCH] drm/i915: Fix conversion between clock ticks and nanoseconds
@ 2025-10-09 19:16 Umesh Nerlige Ramappa
  0 siblings, 0 replies; 19+ messages in thread
From: Umesh Nerlige Ramappa @ 2025-10-09 19:16 UTC (permalink / raw)
  To: intel-gfx, andi.shyti
  Cc: lucas.demarchi, riana.tauro, matthew.brost, Ashutosh Dixit

When tick values are large, the multiplication by NSEC_PER_SEC is larger
than 64 bits and results in bad conversions.

The issue is seen in PMU busyness counters that look like they have
wrapped around due to bad conversion. i915 PMU implementation returns
monotonically increasing counters. If a count is lesser than previous
one, it will only return the larger value until the smaller value
catches up. The user will see this as zero delta between two
measurements even though the engines are busy.

Fix it by using a scaling factor to do the conversion. Add the same fix
for reverse conversion as well.

Closes: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/14955
Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
---
v2:
- Fix divide by zero for Gen11 (Andi)
- Update commit message

v3:
- Warn if gt->clock_frequency is 0 (Andi)
---
 .../gpu/drm/i915/gt/intel_gt_clock_utils.c    | 21 ++++++++++++++-----
 drivers/gpu/drm/i915/gt/intel_gt_types.h      |  2 ++
 2 files changed, 18 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c b/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c
index 88b147fa5cb1..52e47cdcf0ce 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c
@@ -3,6 +3,8 @@
  * Copyright © 2020 Intel Corporation
  */
 
+#include <linux/gcd.h>
+
 #include "i915_drv.h"
 #include "i915_reg.h"
 #include "intel_gt.h"
@@ -171,7 +173,14 @@ static u32 read_clock_frequency(struct intel_uncore *uncore)
 
 void intel_gt_init_clock_frequency(struct intel_gt *gt)
 {
+	unsigned long clock_period_scale;
+
 	gt->clock_frequency = read_clock_frequency(gt->uncore);
+	GEM_WARN_ON(!gt->clock_frequency);
+
+	clock_period_scale = gcd(NSEC_PER_SEC, gt->clock_frequency);
+	gt->clock_nsec_scaled = NSEC_PER_SEC / clock_period_scale;
+	gt->clock_freq_scaled = gt->clock_frequency / clock_period_scale;
 
 	/* Icelake appears to use another fixed frequency for CTX_TIMESTAMP */
 	if (GRAPHICS_VER(gt->i915) == 11)
@@ -180,11 +189,11 @@ void intel_gt_init_clock_frequency(struct intel_gt *gt)
 		gt->clock_period_ns = intel_gt_clock_interval_to_ns(gt, 1);
 
 	GT_TRACE(gt,
-		 "Using clock frequency: %dkHz, period: %dns, wrap: %lldms\n",
+		 "Using clock frequency: %dkHz, period: %dns, wrap: %lldms, scale %lu\n",
 		 gt->clock_frequency / 1000,
 		 gt->clock_period_ns,
-		 div_u64(mul_u32_u32(gt->clock_period_ns, S32_MAX),
-			 USEC_PER_SEC));
+		 div_u64(mul_u32_u32(gt->clock_period_ns, S32_MAX), USEC_PER_SEC),
+		 clock_period_scale);
 }
 
 #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
@@ -205,7 +214,8 @@ static u64 div_u64_roundup(u64 nom, u32 den)
 
 u64 intel_gt_clock_interval_to_ns(const struct intel_gt *gt, u64 count)
 {
-	return div_u64_roundup(count * NSEC_PER_SEC, gt->clock_frequency);
+	return div_u64_roundup(count * gt->clock_nsec_scaled,
+			       gt->clock_freq_scaled);
 }
 
 u64 intel_gt_pm_interval_to_ns(const struct intel_gt *gt, u64 count)
@@ -215,7 +225,8 @@ u64 intel_gt_pm_interval_to_ns(const struct intel_gt *gt, u64 count)
 
 u64 intel_gt_ns_to_clock_interval(const struct intel_gt *gt, u64 ns)
 {
-	return div_u64_roundup(gt->clock_frequency * ns, NSEC_PER_SEC);
+	return div_u64_roundup(gt->clock_freq_scaled * ns,
+			       gt->clock_nsec_scaled);
 }
 
 u64 intel_gt_ns_to_pm_interval(const struct intel_gt *gt, u64 ns)
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h
index bcee084b1f27..a19c568fcdc0 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
@@ -166,6 +166,8 @@ struct intel_gt {
 
 	u32 clock_frequency;
 	u32 clock_period_ns;
+	u32 clock_freq_scaled;
+	u32 clock_nsec_scaled;
 
 	struct intel_llc llc;
 	struct intel_rc6 rc6;
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 19+ messages in thread
* [PATCH] drm/i915: Fix conversion between clock ticks and nanoseconds
@ 2025-10-16  0:03 Umesh Nerlige Ramappa
  2025-10-16 20:07 ` Dixit, Ashutosh
  2025-10-29 11:33 ` Tvrtko Ursulin
  0 siblings, 2 replies; 19+ messages in thread
From: Umesh Nerlige Ramappa @ 2025-10-16  0:03 UTC (permalink / raw)
  To: intel-gfx; +Cc: Ashutosh Dixit, andi.shyti

When tick values are large, the multiplication by NSEC_PER_SEC is larger
than 64 bits and results in bad conversions.

The issue is seen in PMU busyness counters that look like they have
wrapped around due to bad conversion. i915 PMU implementation returns
monotonically increasing counters. If a count is lesser than previous
one, it will only return the larger value until the smaller value
catches up. The user will see this as zero delta between two
measurements even though the engines are busy.

Fix it by using mul_u64_u32_div()

Closes: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/14955
Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
---
v2:
- Fix divide by zero for Gen11 (Andi)
- Update commit message

v3:
- Drop GCD and use mul_u64_u32_div() instead (Ashutosh)
---
 drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c b/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c
index 88b147fa5cb1..c90b35881a26 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c
@@ -205,7 +205,7 @@ static u64 div_u64_roundup(u64 nom, u32 den)
 
 u64 intel_gt_clock_interval_to_ns(const struct intel_gt *gt, u64 count)
 {
-	return div_u64_roundup(count * NSEC_PER_SEC, gt->clock_frequency);
+	return mul_u64_u32_div(count, NSEC_PER_SEC, gt->clock_frequency);
 }
 
 u64 intel_gt_pm_interval_to_ns(const struct intel_gt *gt, u64 count)
@@ -215,7 +215,7 @@ u64 intel_gt_pm_interval_to_ns(const struct intel_gt *gt, u64 count)
 
 u64 intel_gt_ns_to_clock_interval(const struct intel_gt *gt, u64 ns)
 {
-	return div_u64_roundup(gt->clock_frequency * ns, NSEC_PER_SEC);
+	return mul_u64_u32_div(ns, gt->clock_frequency, NSEC_PER_SEC);
 }
 
 u64 intel_gt_ns_to_pm_interval(const struct intel_gt *gt, u64 ns)
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 19+ messages in thread

end of thread, other threads:[~2025-11-05 19:38 UTC | newest]

Thread overview: 19+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-10-07 23:35 [PATCH] drm/i915: Fix conversion between clock ticks and nanoseconds Umesh Nerlige Ramappa
2025-10-08  4:52 ` ✓ i915.CI.BAT: success for " Patchwork
2025-10-08 11:13 ` ✗ i915.CI.Full: failure " Patchwork
2025-10-08 18:21   ` Umesh Nerlige Ramappa
2025-10-09 12:24 ` [PATCH] " Andi Shyti
2025-10-10 17:45   ` Umesh Nerlige Ramappa
2025-10-14  8:58     ` Andi Shyti
2025-10-14  1:19 ` Dixit, Ashutosh
2025-10-14 20:31   ` Umesh Nerlige Ramappa
2025-10-14 21:43     ` Dixit, Ashutosh
  -- strict thread matches above, loose matches on Subject: below --
2025-10-09 19:16 Umesh Nerlige Ramappa
2025-10-16  0:03 Umesh Nerlige Ramappa
2025-10-16 20:07 ` Dixit, Ashutosh
2025-10-16 23:36   ` Umesh Nerlige Ramappa
2025-10-17  2:01     ` Rodrigo Vivi
2025-10-29 11:33 ` Tvrtko Ursulin
2025-10-29 22:01   ` Umesh Nerlige Ramappa
2025-10-31  8:40     ` Tvrtko Ursulin
2025-11-05 19:38       ` Rodrigo Vivi

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox