Intel-XE Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] drm/xe: Convert GT stats to per-cpu counters
@ 2026-02-17 20:05 Matthew Brost
  2026-02-17 20:11 ` ✓ CI.KUnit: success for " Patchwork
                   ` (3 more replies)
  0 siblings, 4 replies; 7+ messages in thread
From: Matthew Brost @ 2026-02-17 20:05 UTC (permalink / raw)
  To: intel-xe

Current GT statistics use atomic64_t counters. Atomic operations incur
a global coherency penalty.

Transition to dynamic per-cpu counters using alloc_percpu(). This allows
stats to be incremented via this_cpu_add(), which compiles to a single
non-locking instruction. This approach keeps the hot-path updates local
to the CPU, avoiding expensive cross-core cache invalidation traffic.

Use for_each_possible_cpu() during aggregation and clear operations to
ensure data consistency across CPU hotplug events.

Signed-off-by: Matthew Brost <matthew.brost@intel.com>
---
 drivers/gpu/drm/xe/xe_gt.c             |  5 +++
 drivers/gpu/drm/xe/xe_gt_stats.c       | 57 +++++++++++++++++++++-----
 drivers/gpu/drm/xe/xe_gt_stats.h       |  6 +++
 drivers/gpu/drm/xe/xe_gt_stats_types.h | 19 +++++++++
 drivers/gpu/drm/xe/xe_gt_types.h       |  5 +--
 5 files changed, 78 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index 68c4771de040..1203d087b68f 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -33,6 +33,7 @@
 #include "xe_gt_printk.h"
 #include "xe_gt_sriov_pf.h"
 #include "xe_gt_sriov_vf.h"
+#include "xe_gt_stats.h"
 #include "xe_gt_sysfs.h"
 #include "xe_gt_topology.h"
 #include "xe_guc_exec_queue_types.h"
@@ -455,6 +456,10 @@ int xe_gt_init_early(struct xe_gt *gt)
 	if (err)
 		return err;
 
+	err = xe_gt_stats_init(gt);
+	if (err)
+		return err;
+
 	CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FW_GT);
 	if (!fw_ref.domains)
 		return -ETIMEDOUT;
diff --git a/drivers/gpu/drm/xe/xe_gt_stats.c b/drivers/gpu/drm/xe/xe_gt_stats.c
index 37506434d7a3..f4bb0bf33995 100644
--- a/drivers/gpu/drm/xe/xe_gt_stats.c
+++ b/drivers/gpu/drm/xe/xe_gt_stats.c
@@ -3,12 +3,37 @@
  * Copyright © 2024 Intel Corporation
  */
 
-#include <linux/atomic.h>
-
+#include <drm/drm_managed.h>
 #include <drm/drm_print.h>
 
+#include "xe_device.h"
 #include "xe_gt_stats.h"
-#include "xe_gt_types.h"
+
+static void xe_gt_stats_fini(struct drm_device *drm, void *arg)
+{
+	struct xe_gt *gt = arg;
+
+	free_percpu(gt->stats);
+}
+
+/**
+ * xe_gt_stats_init() - Initialize GT statistics
+ * @gt: GT structure
+ *
+ * Allocate per-CPU GT statistics. Using per-CPU stats allows increments
+ * to occur without cross-CPU atomics.
+ *
+ * Return: 0 on success, -ENOMEM on failure.
+ */
+int xe_gt_stats_init(struct xe_gt *gt)
+{
+	gt->stats = alloc_percpu(struct xe_gt_stats);
+	if (!gt->stats)
+		return -ENOMEM;
+
+	return drmm_add_action_or_reset(&gt_to_xe(gt)->drm, xe_gt_stats_fini,
+					gt);
+}
 
 /**
  * xe_gt_stats_incr - Increments the specified stats counter
@@ -23,7 +48,7 @@ void xe_gt_stats_incr(struct xe_gt *gt, const enum xe_gt_stats_id id, int incr)
 	if (id >= __XE_GT_STATS_NUM_IDS)
 		return;
 
-	atomic64_add(incr, &gt->stats.counters[id]);
+	this_cpu_add(gt->stats->counters[id], incr);
 }
 
 #define DEF_STAT_STR(ID, name) [XE_GT_STATS_ID_##ID] = name
@@ -94,9 +119,18 @@ int xe_gt_stats_print_info(struct xe_gt *gt, struct drm_printer *p)
 {
 	enum xe_gt_stats_id id;
 
-	for (id = 0; id < __XE_GT_STATS_NUM_IDS; ++id)
-		drm_printf(p, "%s: %lld\n", stat_description[id],
-			   atomic64_read(&gt->stats.counters[id]));
+	for (id = 0; id < __XE_GT_STATS_NUM_IDS; ++id) {
+		u64 total = 0;
+		int cpu;
+
+		for_each_possible_cpu(cpu) {
+			struct xe_gt_stats *s = per_cpu_ptr(gt->stats, cpu);
+
+			total += s->counters[id];
+		}
+
+		drm_printf(p, "%s: %lld\n", stat_description[id], total);
+	}
 
 	return 0;
 }
@@ -109,8 +143,11 @@ int xe_gt_stats_print_info(struct xe_gt *gt, struct drm_printer *p)
  */
 void xe_gt_stats_clear(struct xe_gt *gt)
 {
-	int id;
+	int cpu;
+
+	for_each_possible_cpu(cpu) {
+		struct xe_gt_stats *s = per_cpu_ptr(gt->stats, cpu);
 
-	for (id = 0; id < ARRAY_SIZE(gt->stats.counters); ++id)
-		atomic64_set(&gt->stats.counters[id], 0);
+		memset(s, 0, sizeof(*s));
+	}
 }
diff --git a/drivers/gpu/drm/xe/xe_gt_stats.h b/drivers/gpu/drm/xe/xe_gt_stats.h
index 59a7bf60e242..3d0defab9b30 100644
--- a/drivers/gpu/drm/xe/xe_gt_stats.h
+++ b/drivers/gpu/drm/xe/xe_gt_stats.h
@@ -14,10 +14,16 @@ struct xe_gt;
 struct drm_printer;
 
 #ifdef CONFIG_DEBUG_FS
+int xe_gt_stats_init(struct xe_gt *gt);
 int xe_gt_stats_print_info(struct xe_gt *gt, struct drm_printer *p);
 void xe_gt_stats_clear(struct xe_gt *gt);
 void xe_gt_stats_incr(struct xe_gt *gt, const enum xe_gt_stats_id id, int incr);
 #else
+static inline int xe_gt_stats_init(struct xe_gt *gt)
+{
+	return 0;
+}
+
 static inline void
 xe_gt_stats_incr(struct xe_gt *gt, const enum xe_gt_stats_id id,
 		 int incr)
diff --git a/drivers/gpu/drm/xe/xe_gt_stats_types.h b/drivers/gpu/drm/xe/xe_gt_stats_types.h
index b8accdbc54eb..79568591bd67 100644
--- a/drivers/gpu/drm/xe/xe_gt_stats_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_stats_types.h
@@ -6,6 +6,8 @@
 #ifndef _XE_GT_STATS_TYPES_H_
 #define _XE_GT_STATS_TYPES_H_
 
+#include <linux/types.h>
+
 enum xe_gt_stats_id {
 	XE_GT_STATS_ID_SVM_PAGEFAULT_COUNT,
 	XE_GT_STATS_ID_TLB_INVAL,
@@ -58,4 +60,21 @@ enum xe_gt_stats_id {
 	__XE_GT_STATS_NUM_IDS,
 };
 
+/**
+ * struct xe_gt_stats - Per-CPU GT statistics counters
+ * @counters: Array of 64-bit counters indexed by &enum xe_gt_stats_id
+ *
+ * This structure is used for high-frequency, per-CPU statistics collection
+ * in the Xe driver. By using a per-CPU allocation and ensuring the structure
+ * is cache-line aligned, we avoid the performance-heavy atomics and cache
+ * coherency traffic.
+ *
+ * Updates to these counters should be performed using the this_cpu_add()
+ * macro to ensure they are atomic with respect to local interrupts and
+ * preemption-safe without the overhead of explicit locking.
+ */
+struct xe_gt_stats {
+	u64 counters[__XE_GT_STATS_NUM_IDS];
+} ____cacheline_aligned;
+
 #endif
diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h
index caf7e7e78be9..8b55cf25a75f 100644
--- a/drivers/gpu/drm/xe/xe_gt_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_types.h
@@ -158,10 +158,7 @@ struct xe_gt {
 
 #if IS_ENABLED(CONFIG_DEBUG_FS)
 	/** @stats: GT stats */
-	struct {
-		/** @stats.counters: counters for various GT stats */
-		atomic64_t counters[__XE_GT_STATS_NUM_IDS];
-	} stats;
+	struct xe_gt_stats __percpu *stats;
 #endif
 
 	/**
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2026-02-18  0:16 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-02-17 20:05 [PATCH] drm/xe: Convert GT stats to per-cpu counters Matthew Brost
2026-02-17 20:11 ` ✓ CI.KUnit: success for " Patchwork
2026-02-17 20:55 ` ✓ Xe.CI.BAT: " Patchwork
2026-02-17 23:13 ` [PATCH] " Summers, Stuart
2026-02-17 23:18   ` Matthew Brost
2026-02-17 23:22     ` Summers, Stuart
2026-02-18  0:16 ` ✗ Xe.CI.FULL: failure for " Patchwork

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox