From: Matthew Brost <matthew.brost@intel.com>
To: intel-xe@lists.freedesktop.org
Subject: [PATCH] drm/xe: Convert GT stats to per-cpu counters
Date: Tue, 17 Feb 2026 12:05:52 -0800 [thread overview]
Message-ID: <20260217200552.596718-1-matthew.brost@intel.com> (raw)
Current GT statistics use atomic64_t counters. Atomic operations incur
a global coherency penalty.
Transition to dynamic per-cpu counters using alloc_percpu(). This allows
stats to be incremented via this_cpu_add(), which compiles to a single
non-locking instruction. This approach keeps the hot-path updates local
to the CPU, avoiding expensive cross-core cache invalidation traffic.
Use for_each_possible_cpu() during aggregation and clear operations to
ensure data consistency across CPU hotplug events.
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
---
drivers/gpu/drm/xe/xe_gt.c | 5 +++
drivers/gpu/drm/xe/xe_gt_stats.c | 57 +++++++++++++++++++++-----
drivers/gpu/drm/xe/xe_gt_stats.h | 6 +++
drivers/gpu/drm/xe/xe_gt_stats_types.h | 19 +++++++++
drivers/gpu/drm/xe/xe_gt_types.h | 5 +--
5 files changed, 78 insertions(+), 14 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index 68c4771de040..1203d087b68f 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -33,6 +33,7 @@
#include "xe_gt_printk.h"
#include "xe_gt_sriov_pf.h"
#include "xe_gt_sriov_vf.h"
+#include "xe_gt_stats.h"
#include "xe_gt_sysfs.h"
#include "xe_gt_topology.h"
#include "xe_guc_exec_queue_types.h"
@@ -455,6 +456,10 @@ int xe_gt_init_early(struct xe_gt *gt)
if (err)
return err;
+ err = xe_gt_stats_init(gt);
+ if (err)
+ return err;
+
CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FW_GT);
if (!fw_ref.domains)
return -ETIMEDOUT;
diff --git a/drivers/gpu/drm/xe/xe_gt_stats.c b/drivers/gpu/drm/xe/xe_gt_stats.c
index 37506434d7a3..f4bb0bf33995 100644
--- a/drivers/gpu/drm/xe/xe_gt_stats.c
+++ b/drivers/gpu/drm/xe/xe_gt_stats.c
@@ -3,12 +3,37 @@
* Copyright © 2024 Intel Corporation
*/
-#include <linux/atomic.h>
-
+#include <drm/drm_managed.h>
#include <drm/drm_print.h>
+#include "xe_device.h"
#include "xe_gt_stats.h"
-#include "xe_gt_types.h"
+
+static void xe_gt_stats_fini(struct drm_device *drm, void *arg)
+{
+ struct xe_gt *gt = arg;
+
+ free_percpu(gt->stats);
+}
+
+/**
+ * xe_gt_stats_init() - Initialize GT statistics
+ * @gt: GT structure
+ *
+ * Allocate per-CPU GT statistics. Using per-CPU stats allows increments
+ * to occur without cross-CPU atomics.
+ *
+ * Return: 0 on success, -ENOMEM on failure.
+ */
+int xe_gt_stats_init(struct xe_gt *gt)
+{
+ gt->stats = alloc_percpu(struct xe_gt_stats);
+ if (!gt->stats)
+ return -ENOMEM;
+
+ return drmm_add_action_or_reset(>_to_xe(gt)->drm, xe_gt_stats_fini,
+ gt);
+}
/**
* xe_gt_stats_incr - Increments the specified stats counter
@@ -23,7 +48,7 @@ void xe_gt_stats_incr(struct xe_gt *gt, const enum xe_gt_stats_id id, int incr)
if (id >= __XE_GT_STATS_NUM_IDS)
return;
- atomic64_add(incr, >->stats.counters[id]);
+ this_cpu_add(gt->stats->counters[id], incr);
}
#define DEF_STAT_STR(ID, name) [XE_GT_STATS_ID_##ID] = name
@@ -94,9 +119,18 @@ int xe_gt_stats_print_info(struct xe_gt *gt, struct drm_printer *p)
{
enum xe_gt_stats_id id;
- for (id = 0; id < __XE_GT_STATS_NUM_IDS; ++id)
- drm_printf(p, "%s: %lld\n", stat_description[id],
- atomic64_read(>->stats.counters[id]));
+ for (id = 0; id < __XE_GT_STATS_NUM_IDS; ++id) {
+ u64 total = 0;
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ struct xe_gt_stats *s = per_cpu_ptr(gt->stats, cpu);
+
+ total += s->counters[id];
+ }
+
+ drm_printf(p, "%s: %lld\n", stat_description[id], total);
+ }
return 0;
}
@@ -109,8 +143,11 @@ int xe_gt_stats_print_info(struct xe_gt *gt, struct drm_printer *p)
*/
void xe_gt_stats_clear(struct xe_gt *gt)
{
- int id;
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ struct xe_gt_stats *s = per_cpu_ptr(gt->stats, cpu);
- for (id = 0; id < ARRAY_SIZE(gt->stats.counters); ++id)
- atomic64_set(>->stats.counters[id], 0);
+ memset(s, 0, sizeof(*s));
+ }
}
diff --git a/drivers/gpu/drm/xe/xe_gt_stats.h b/drivers/gpu/drm/xe/xe_gt_stats.h
index 59a7bf60e242..3d0defab9b30 100644
--- a/drivers/gpu/drm/xe/xe_gt_stats.h
+++ b/drivers/gpu/drm/xe/xe_gt_stats.h
@@ -14,10 +14,16 @@ struct xe_gt;
struct drm_printer;
#ifdef CONFIG_DEBUG_FS
+int xe_gt_stats_init(struct xe_gt *gt);
int xe_gt_stats_print_info(struct xe_gt *gt, struct drm_printer *p);
void xe_gt_stats_clear(struct xe_gt *gt);
void xe_gt_stats_incr(struct xe_gt *gt, const enum xe_gt_stats_id id, int incr);
#else
+static inline int xe_gt_stats_init(struct xe_gt *gt)
+{
+ return 0;
+}
+
static inline void
xe_gt_stats_incr(struct xe_gt *gt, const enum xe_gt_stats_id id,
int incr)
diff --git a/drivers/gpu/drm/xe/xe_gt_stats_types.h b/drivers/gpu/drm/xe/xe_gt_stats_types.h
index b8accdbc54eb..79568591bd67 100644
--- a/drivers/gpu/drm/xe/xe_gt_stats_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_stats_types.h
@@ -6,6 +6,8 @@
#ifndef _XE_GT_STATS_TYPES_H_
#define _XE_GT_STATS_TYPES_H_
+#include <linux/types.h>
+
enum xe_gt_stats_id {
XE_GT_STATS_ID_SVM_PAGEFAULT_COUNT,
XE_GT_STATS_ID_TLB_INVAL,
@@ -58,4 +60,21 @@ enum xe_gt_stats_id {
__XE_GT_STATS_NUM_IDS,
};
+/**
+ * struct xe_gt_stats - Per-CPU GT statistics counters
+ * @counters: Array of 64-bit counters indexed by &enum xe_gt_stats_id
+ *
+ * This structure is used for high-frequency, per-CPU statistics collection
+ * in the Xe driver. By using a per-CPU allocation and ensuring the structure
+ * is cache-line aligned, we avoid the performance-heavy atomics and cache
+ * coherency traffic.
+ *
+ * Updates to these counters should be performed using the this_cpu_add()
+ * macro to ensure they are atomic with respect to local interrupts and
+ * preemption-safe without the overhead of explicit locking.
+ */
+struct xe_gt_stats {
+ u64 counters[__XE_GT_STATS_NUM_IDS];
+} ____cacheline_aligned;
+
#endif
diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h
index caf7e7e78be9..8b55cf25a75f 100644
--- a/drivers/gpu/drm/xe/xe_gt_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_types.h
@@ -158,10 +158,7 @@ struct xe_gt {
#if IS_ENABLED(CONFIG_DEBUG_FS)
/** @stats: GT stats */
- struct {
- /** @stats.counters: counters for various GT stats */
- atomic64_t counters[__XE_GT_STATS_NUM_IDS];
- } stats;
+ struct xe_gt_stats __percpu *stats;
#endif
/**
--
2.34.1
next reply other threads:[~2026-02-17 20:05 UTC|newest]
Thread overview: 7+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-02-17 20:05 Matthew Brost [this message]
2026-02-17 20:11 ` ✓ CI.KUnit: success for drm/xe: Convert GT stats to per-cpu counters Patchwork
2026-02-17 20:55 ` ✓ Xe.CI.BAT: " Patchwork
2026-02-17 23:13 ` [PATCH] " Summers, Stuart
2026-02-17 23:18 ` Matthew Brost
2026-02-17 23:22 ` Summers, Stuart
2026-02-18 0:16 ` ✗ Xe.CI.FULL: failure for " Patchwork
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260217200552.596718-1-matthew.brost@intel.com \
--to=matthew.brost@intel.com \
--cc=intel-xe@lists.freedesktop.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox