[PATCH v5 5/8] drm/xe: Implement xe_work_period_worker

Intel-XE Archive on lore.kernel.org
 help / color / mirror / Atom feed

From: Aakash Deep Sarkar <aakash.deep.sarkar@intel.com>
To: intel-xe@lists.freedesktop.org
Cc: jeevaka.badrappan@intel.com, rodrigo.vivi@intel.com,
	matthew.brost@intel.com, carlos.santa@intel.com,
	matthew.auld@intel.com, jani.nikula@intel.com,
	ashutosh.dixit@intel.com,
	Aakash Deep Sarkar <aakash.deep.sarkar@intel.com>
Subject: [PATCH v5 5/8] drm/xe: Implement xe_work_period_worker
Date: Mon,  6 Oct 2025 14:20:26 +0000	[thread overview]
Message-ID: <20251006142034.674435-6-aakash.deep.sarkar@intel.com> (raw)
In-Reply-To: <20251006142034.674435-1-aakash.deep.sarkar@intel.com>

The work of collecting the GPU run time for a given
xe_user and emitting its event, is done by the
xe_work_period_worker kworker. At the time of creation
of a new xe_user, we simultaneously start a delayed
kworker thread. The delay of execution is set to be
500 ms. After the completion of the work, the kworker
schedules itself for the next execution. This is done
as long as the reference to the xe_user pointer is
valid.

During each execution cycle the xe_work_period_worker
iterates over all the xe files in the xe_user::filelist
and accumulate their corresponding GPU runtime into the
xe_user::active_duration_ns; while also updating each of
the xe_file::active_duration_ns. The total runtime for
this uid in the current sampling period is the delta
between the previous xe_user::active_duration_ns and
the current xe_user::active_duration_ns.

We also record the current timestamp at the end of each
invocation to xe_work_period_worker function in the
xe_user::last_timestamp_ns. The sampling period for this
uid is the delta between the previous timestamp and the
current timestamp.

Signed-off-by: Aakash Deep Sarkar <aakash.deep.sarkar@intel.com>
---
 drivers/gpu/drm/xe/xe_device.c |  11 +--
 drivers/gpu/drm/xe/xe_pm.c     |   5 ++
 drivers/gpu/drm/xe/xe_user.c   | 127 +++++++++++++++++++++++++++++++--
 drivers/gpu/drm/xe/xe_user.h   |  19 ++++-
 4 files changed, 150 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index 5a084fd39876..54ac71d1265d 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -140,11 +140,12 @@ static void xe_file_destroy(struct kref *ref)
 	xe_drm_client_put(xef->client);
 	kfree(xef->process_name);
 
-	mutex_lock(&xef->user->filelist_lock);
-	list_del(&xef->user_link);
-	mutex_unlock(&xef->user->filelist_lock);
-
-	xe_user_put(xef->user);
+	if (xef->user) {
+		mutex_lock(&xef->user->lock);
+		list_del(&xef->user_link);
+		xe_user_put(xef->user);
+		mutex_unlock(&xef->user->lock);
+	}
 	kfree(xef);
 }
 
diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c
index b7e3094f8acf..c7add2616189 100644
--- a/drivers/gpu/drm/xe/xe_pm.c
+++ b/drivers/gpu/drm/xe/xe_pm.c
@@ -26,6 +26,7 @@
 #include "xe_pxp.h"
 #include "xe_sriov_vf_ccs.h"
 #include "xe_trace.h"
+#include "xe_user.h"
 #include "xe_vm.h"
 #include "xe_wa.h"
 
@@ -598,6 +599,8 @@ int xe_pm_runtime_suspend(struct xe_device *xe)
 
 	xe_i2c_pm_suspend(xe);
 
+	xe_user_cancel_workers(xe);
+
 	xe_rpm_lockmap_release(xe);
 	xe_pm_write_callback_task(xe, NULL);
 	return 0;
@@ -650,6 +653,8 @@ int xe_pm_runtime_resume(struct xe_device *xe)
 
 	xe_i2c_pm_resume(xe, xe->d3cold.allowed);
 
+	xe_user_resume_workers(xe);
+
 	xe_irq_resume(xe);
 
 	for_each_gt(gt, xe, id)
diff --git a/drivers/gpu/drm/xe/xe_user.c b/drivers/gpu/drm/xe/xe_user.c
index cb3de75aa497..fb54d2659642 100644
--- a/drivers/gpu/drm/xe/xe_user.c
+++ b/drivers/gpu/drm/xe/xe_user.c
@@ -5,8 +5,15 @@
 
 #include <drm/drm_drv.h>
 
+#include "xe_assert.h"
+#include "xe_device_types.h"
+#include "xe_exec_queue.h"
+#include "xe_pm.h"
 #include "xe_user.h"
 
+#define CREATE_TRACE_POINTS
+#include <trace/gpu_work_period.h>
+
 
 /**
  * DOC: Xe User
@@ -50,7 +57,82 @@
  */
 
 
+static inline void schedule_next_work(struct xe_device *xe, unsigned int id)
+{
+	struct xe_user *user;
+
+	mutex_lock(&xe->work_period.lock);
+	user = xa_load(&xe->work_period.users, id);
+	if (user && xe_user_get_unless_zero(user))
+		schedule_delayed_work(&user->delay_work,
+				msecs_to_jiffies(XE_WORK_PERIOD_INTERVAL));
+	mutex_unlock(&xe->work_period.lock);
+}
+
+static void xe_work_period_worker(struct work_struct *work)
+{
+	struct xe_user *user = container_of(work, struct xe_user, delay_work.work);
+	struct xe_device *xe = user->xe;
+	struct xe_file *xef;
+	struct xe_exec_queue *q;
+
+	/*
+	 * The GPU work period event requires the following parameters
+	 *
+	 * gpuid:           GPU index in case the platform has more than one GPU
+	 * uid:             user id of the app
+	 * start_time:      start time for the sampling period in nanosecs
+	 * end_time:        end time for the sampling period in nanosecs
+	 * active_duration: Total runtime in nanosecs for this uid in
+	 *                  the current sampling period.
+	 */
+	u32 gpuid = 0, uid = user->uid, id = user->id;
+	u64 start_time, end_time, active_duration;
+	u64 last_active_duration, last_timestamp;
+	unsigned long i;
+
+	mutex_lock(&user->lock);
+
+	// Save the last recorded active duration and timestamp
+	last_active_duration = user->active_duration_ns;
+	last_timestamp = user->last_timestamp_ns;
+
+	if (xe_pm_runtime_get_if_active(xe)) {
+
+		list_for_each_entry(xef, &user->filelist, user_link) {
+
+			wait_var_event(&xef->exec_queue.pending_removal,
+			!atomic_read(&xef->exec_queue.pending_removal));
+
+			/* Accumulate all the exec queues from this file */
+			mutex_lock(&xef->exec_queue.lock);
+			xa_for_each(&xef->exec_queue.xa, i, q) {
+				xe_exec_queue_get(q);
+				mutex_unlock(&xef->exec_queue.lock);
+
+				xe_exec_queue_update_run_ticks(q);
+
+				mutex_lock(&xef->exec_queue.lock);
+				xe_exec_queue_put(q);
+			}
+			mutex_unlock(&xef->exec_queue.lock);
+			user->active_duration_ns += xef->active_duration_ns;
+		}
+
+		xe_pm_runtime_put(xe);
+
+		start_time = last_timestamp + 1;
+		end_time = ktime_get_raw_ns();
+		active_duration = user->active_duration_ns - last_active_duration;
+		trace_gpu_work_period(gpuid, uid, start_time, end_time, active_duration);
+		user->last_timestamp_ns = end_time;
+		xe_user_put(user);
+	}
+
+	mutex_unlock(&user->lock);
 
+	schedule_next_work(xe, id);
+}
 
 /**
  * xe_user_alloc() - Allocate xe user
@@ -71,9 +153,9 @@ static struct xe_user *xe_user_alloc(void)
 		return NULL;
 
 	kref_init(&user->refcount);
-	mutex_init(&user->filelist_lock);
+	mutex_init(&user->lock);
 	INIT_LIST_HEAD(&user->filelist);
-	INIT_WORK(&user->work, work_period_worker);
+	INIT_DELAYED_WORK(&user->delay_work, xe_work_period_worker);
 	return user;
 }
 
@@ -153,12 +235,49 @@ int xe_user_init(struct xe_device *xe, struct xe_file *xef, unsigned int uid)
 
 		user->id = idx;
 		drm_dev_get(&xe->drm);
+
+		xe_user_get(user);
+		if (!schedule_delayed_work(&user->delay_work,
+					msecs_to_jiffies(XE_WORK_PERIOD_INTERVAL)))
+			xe_user_put(user);
 	}
 
-	mutex_lock(&user->filelist_lock);
+	mutex_lock(&user->lock);
 	list_add(&xef->user_link, &user->filelist);
-	mutex_unlock(&user->filelist_lock);
+	mutex_unlock(&user->lock);
 	xef->user = user;
 
 	return 0;
 }
+
+void xe_user_cancel_workers(struct xe_device *xe)
+{
+	struct xe_user *user = NULL;
+	unsigned long i = 0;
+
+	mutex_lock(&xe->work_period.lock);
+	xa_for_each(&xe->work_period.users, i, user) {
+		if (user && xe_user_get_unless_zero(user)) {
+			cancel_delayed_work_sync(&user->delay_work);
+			xe_user_put(user);
+		}
+	}
+	mutex_unlock(&xe->work_period.lock);
+}
+
+void xe_user_resume_workers(struct xe_device *xe)
+{
+	struct xe_user *user = NULL;
+	unsigned long i = 0;
+
+	mutex_lock(&xe->work_period.lock);
+	xa_for_each(&xe->work_period.users, i, user) {
+		if (user && xe_user_get_unless_zero(user)) {
+			if (!schedule_delayed_work(&user->delay_work,
+					msecs_to_jiffies(XE_WORK_PERIOD_INTERVAL)))
+				xe_user_put(user);
+		}
+	}
+	mutex_unlock(&xe->work_period.lock);
+}
+
diff --git a/drivers/gpu/drm/xe/xe_user.h b/drivers/gpu/drm/xe/xe_user.h
index 341200c55509..55016ba189f1 100644
--- a/drivers/gpu/drm/xe/xe_user.h
+++ b/drivers/gpu/drm/xe/xe_user.h
@@ -9,6 +9,8 @@
 #include "xe_device.h"
 
 
+#define XE_WORK_PERIOD_INTERVAL 500
+
 /**
  * struct xe_user - xe user structure
  *
@@ -28,9 +30,9 @@ struct xe_user {
 	struct xe_device *xe;
 
 	/**
-	 * @filelist_lock: lock protecting the filelist
+	 * @filelist_lock: lock protecting this structure
 	 */
-	struct mutex filelist_lock;
+	struct mutex lock;
 
 	/**
 	 * @filelist: list of xe files belonging to this xe user
@@ -41,7 +43,7 @@ struct xe_user {
 	 * @work: work to emit the gpu work period event for this
 	 * xe user
 	 */
-	struct work_struct work;
+	struct delayed_work delay_work;
 
 	/**
 	 * @id: index of this user into the xe device::users xarray
@@ -68,6 +70,17 @@ struct xe_user {
 
 int xe_user_init(struct xe_device *xe, struct xe_file *xef, unsigned int uid);
 
+void xe_user_cancel_workers(struct xe_device *xe);
+
+void xe_user_resume_workers(struct xe_device *xe);
+
+static inline struct xe_user *
+xe_user_get_unless_zero(struct xe_user *user)
+{
+	if (kref_get_unless_zero(&user->refcount))
+		return user;
+	return NULL;
+}
 
 static inline struct xe_user *
 xe_user_get(struct xe_user *user)
-- 
2.49.0

next prev parent reply	other threads:[~2025-10-06 14:56 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-10-06 14:20 [PATCH v5 0/8] [ANDROID]: Add GPU work period support for Xe driver Aakash Deep Sarkar
2025-10-06 14:20 ` [PATCH v5 1/8] drm/xe: Add a new xe_user structure Aakash Deep Sarkar
2025-10-06 14:20 ` [PATCH v5 2/8] drm/xe: Add xe_gt_clock_interval_to_ns function Aakash Deep Sarkar
2025-10-06 14:20 ` [PATCH v5 3/8] drm/xe: Modify xe_exec_queue_update_run_ticks Aakash Deep Sarkar
2025-10-06 14:20 ` [PATCH v5 4/8] drm/xe: Handle xe_user creation and removal Aakash Deep Sarkar
2025-10-06 20:49   ` Matthew Brost
2025-10-06 21:00     ` Matthew Brost
2025-10-06 14:20 ` Aakash Deep Sarkar [this message]
2025-10-06 21:12   ` [PATCH v5 5/8] drm/xe: Implement xe_work_period_worker Matthew Brost
2025-10-06 21:38     ` Matthew Brost
2025-10-06 14:20 ` [PATCH v5 6/8] drm/xe: Add a Kconfig option for GPU work period Aakash Deep Sarkar
2025-10-06 14:20 ` [PATCH v5 7/8] drm/xe: Handle xe_work_period destruction Aakash Deep Sarkar
2025-10-06 14:20 ` [PATCH v5 8/8] Hack patch: Do not merge Aakash Deep Sarkar
2025-10-06 15:03 ` ✗ CI.checkpatch: warning for : Add GPU work period support for Xe driver (rev5) Patchwork
2025-10-06 15:04 ` ✓ CI.KUnit: success " Patchwork
2025-10-06 15:58 ` ✗ Xe.CI.BAT: failure " Patchwork
2025-10-06 17:42 ` ✗ Xe.CI.Full: " Patchwork

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:5a084fd3987 dfblob:54ac71d1265 dfblob:b7e3094f8ac
dfblob:c7add261618 dfblob:cb3de75aa49 dfblob:fb54d265964
dfblob:341200c5550 dfblob:55016ba189f )
 OR (
bs:"[PATCH v5 5/8] drm/xe: Implement xe_work_period_worker" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20251006142034.674435-6-aakash.deep.sarkar@intel.com \
    --to=aakash.deep.sarkar@intel.com \
    --cc=ashutosh.dixit@intel.com \
    --cc=carlos.santa@intel.com \
    --cc=intel-xe@lists.freedesktop.org \
    --cc=jani.nikula@intel.com \
    --cc=jeevaka.badrappan@intel.com \
    --cc=matthew.auld@intel.com \
    --cc=matthew.brost@intel.com \
    --cc=rodrigo.vivi@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox