From: Matthew Brost <matthew.brost@intel.com>
To: Aakash Deep Sarkar <aakash.deep.sarkar@intel.com>
Cc: <intel-xe@lists.freedesktop.org>, <jeevaka.badrappan@intel.com>,
<rodrigo.vivi@intel.com>, <carlos.santa@intel.com>,
<matthew.auld@intel.com>, <jani.nikula@intel.com>,
<ashutosh.dixit@intel.com>
Subject: Re: [PATCH v5 4/8] drm/xe: Handle xe_user creation and removal
Date: Mon, 6 Oct 2025 14:00:51 -0700 [thread overview]
Message-ID: <aOQuAzLb6fcoJq4g@lstrano-desk.jf.intel.com> (raw)
In-Reply-To: <aOQrXGKw30BJiL+8@lstrano-desk.jf.intel.com>
On Mon, Oct 06, 2025 at 01:49:32PM -0700, Matthew Brost wrote:
> On Mon, Oct 06, 2025 at 02:20:25PM +0000, Aakash Deep Sarkar wrote:
> > We want our xe user structure to be created when a new
> > user id opens the xe device node and to be destroyed
> > when the final xe file with this uid is closed. In other
> > words the xe_user structure for a uid should remain in
> > scope as long as any process with this uid has an open
> > xe file descriptor.
> >
> > To implement this we maintain an xarray of xe user
> > structures inside our xe device instance. Whenever a new
> > xe file is created via an open call, we check if the
> > calling process' uid is already present in our xarray.
> > If so, we increment the refcount for the associated
> > xe user and add this xe file to the list of xe files
> > belonging to this xe user. Otherwise, we allocate a
> > new xe user structure for this uid and initialize its
> > file list with this xe file.
> >
> > Whenever an xe file is destroyed, we decrement the
> > refcount of the associated xe user. When the last
> > xe file in the xe user's file list is destroyed,
> > the xe user refcount should drop to zero and the
> > xe user should be cleaned up. During the cleanup path
> > we remove the xarray entry for this xe user in our
> > xe device and free up its memory.
> >
> > Signed-off-by: Aakash Deep Sarkar <aakash.deep.sarkar@intel.com>
> > ---
> > drivers/gpu/drm/xe/xe_device.c | 21 ++++++++
> > drivers/gpu/drm/xe/xe_device_types.h | 16 ++++++
> > drivers/gpu/drm/xe/xe_user.c | 77 +++++++++++++++++++++++++++-
> > drivers/gpu/drm/xe/xe_user.h | 11 +++-
> > 4 files changed, 123 insertions(+), 2 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
> > index 386940323630..5a084fd39876 100644
> > --- a/drivers/gpu/drm/xe/xe_device.c
> > +++ b/drivers/gpu/drm/xe/xe_device.c
> > @@ -65,6 +65,7 @@
> > #include "xe_tile.h"
> > #include "xe_ttm_stolen_mgr.h"
> > #include "xe_ttm_sys_mgr.h"
> > +#include "xe_user.h"
> > #include "xe_vm.h"
> > #include "xe_vm_madvise.h"
> > #include "xe_vram.h"
> > @@ -82,7 +83,9 @@ static int xe_file_open(struct drm_device *dev, struct drm_file *file)
> > struct xe_drm_client *client;
> > struct xe_file *xef;
> > int ret = -ENOMEM;
> > + int uid = -EINVAL;
> > struct task_struct *task = NULL;
> > + const struct cred *cred = NULL;
> >
> > xef = kzalloc(sizeof(*xef), GFP_KERNEL);
> > if (!xef)
> > @@ -107,8 +110,16 @@ static int xe_file_open(struct drm_device *dev, struct drm_file *file)
> > file->driver_priv = xef;
> > kref_init(&xef->refcount);
> >
> > + INIT_LIST_HEAD(&xef->user_link);
> > +
> > task = get_pid_task(rcu_access_pointer(file->pid), PIDTYPE_PID);
> > if (task) {
> > + cred = get_task_cred(task);
> > + if (cred) {
> > + uid = (unsigned int) cred->euid.val;
> > + xe_user_init(xe, xef, uid);
> > + put_cred(cred);
> > + }
> > xef->process_name = kstrdup(task->comm, GFP_KERNEL);
> > xef->pid = task->pid;
> > put_task_struct(task);
> > @@ -128,6 +139,12 @@ static void xe_file_destroy(struct kref *ref)
> >
> > xe_drm_client_put(xef->client);
> > kfree(xef->process_name);
> > +
> > + mutex_lock(&xef->user->filelist_lock);
> > + list_del(&xef->user_link);
> > + mutex_unlock(&xef->user->filelist_lock);
> > +
> > + xe_user_put(xef->user);
> > kfree(xef);
> > }
> >
> > @@ -467,6 +484,10 @@ struct xe_device *xe_device_create(struct pci_dev *pdev,
> >
> > xa_init_flags(&xe->usm.asid_to_vm, XA_FLAGS_ALLOC);
> >
> > + xa_init_flags(&xe->work_period.users, XA_FLAGS_ALLOC1);
> > +
> > + mutex_init(&xe->work_period.lock);
> > +
> > if (IS_ENABLED(CONFIG_DRM_XE_DEBUG)) {
> > /* Trigger a large asid and an early asid wrap. */
> > u32 asid;
> > diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
> > index 54a612787289..4d4e9a63b3fd 100644
> > --- a/drivers/gpu/drm/xe/xe_device_types.h
> > +++ b/drivers/gpu/drm/xe/xe_device_types.h
> > @@ -613,6 +613,16 @@ struct xe_device {
> > atomic_t g2g_test_count;
> > #endif
> >
> > + /**
> > + * @xe_work_period: Support for GPU work period tracepoint
> > + */
> > + struct xe_work_period {
> > + /** @users: list of users that have opened this xe device */
> > + struct xarray users;
> > + /** @lock: lock protecting this structure */
> > + struct mutex lock;
> > + } work_period;
> > +
> > /* private: */
> >
> > #if IS_ENABLED(CONFIG_DRM_XE_DISPLAY)
> > @@ -684,6 +694,12 @@ struct xe_file {
> > /** @active_duration_ns: total run time in ns for this xe file */
> > u64 active_duration_ns;
> >
> > + /** @user: pointer to struct xe_user associated with this xe file */
> > + struct xe_user *user;
> > +
> > + /** @user_link: link into xe_user::filelist */
> > + struct list_head user_link;
> > +
> > /** @client: drm client */
> > struct xe_drm_client *client;
> >
> > diff --git a/drivers/gpu/drm/xe/xe_user.c b/drivers/gpu/drm/xe/xe_user.c
> > index f35e18776300..cb3de75aa497 100644
> > --- a/drivers/gpu/drm/xe/xe_user.c
> > +++ b/drivers/gpu/drm/xe/xe_user.c
> > @@ -3,6 +3,8 @@
> > * Copyright © 2025 Intel Corporation
> > */
> >
> > +#include <drm/drm_drv.h>
> > +
> > #include "xe_user.h"
> >
> >
> > @@ -60,7 +62,7 @@
> > *
> > * Return: pointer to user struct or NULL if can't allocate
> > */
> > -struct xe_user *xe_user_alloc(void)
> > +static struct xe_user *xe_user_alloc(void)
> > {
> > struct xe_user *user;
> >
> > @@ -71,6 +73,7 @@ struct xe_user *xe_user_alloc(void)
> > kref_init(&user->refcount);
> > mutex_init(&user->filelist_lock);
> > INIT_LIST_HEAD(&user->filelist);
> > + INIT_WORK(&user->work, work_period_worker);
> > return user;
> > }
> >
> > @@ -84,6 +87,78 @@ void __xe_user_free(struct kref *kref)
> > {
> > struct xe_user *user =
> > container_of(kref, struct xe_user, refcount);
> > + struct xe_device *xe = user->xe;
> > + void *lookup;
> > +
> > + mutex_lock(&xe->work_period.lock);
>
> You don't need to take work_period.lock look here. Xa have there own
> locking. work_period.lock should protect the reference upon lookup,
> that's it.
>
Actually I mis-spoke here. You do need this part with how you have it
coded but something looks very odd with the work_period.lock / xarray /
ref counting / waiting on workers under work_period.lock scheme.
Let me think on this part for a bit.
Matt
> > + lookup = xa_erase(&xe->work_period.users, user->id);
> > + xe_assert(xe, lookup == user);
> > + mutex_unlock(&xe->work_period.lock);
> >
> > + drm_dev_put(&user->xe->drm);
> > kfree(user);
> > }
> > +
> > +static struct xe_user *xe_user_lookup(struct xe_device *xe, u32 uid)
> > +{
> > + struct xe_user *user = NULL;
> > + unsigned long i;
> > +
> > + mutex_lock(&xe->work_period.lock);
>
> guard(mutex)(&xe->work_period.lock) will work better here.
>
> > + xa_for_each(&xe->work_period.users, i, user) {
> > + if (user->uid == uid) {
> > + xe_user_get(user);
> > + mutex_unlock(&xe->work_period.lock);
> > + return user;
> > + }
> > + }
> > + mutex_unlock(&xe->work_period.lock);
> > +
> > + return NULL;
> > +}
> > +
> > +int xe_user_init(struct xe_device *xe, struct xe_file *xef, unsigned int uid)
> > +{
> > + struct xe_user *user = NULL;
> > + int ret;
> > + u32 idx;
> > + /*
> > + * Check if the calling process/uid has already been registered
> > + * with the xe device during a previous open call. If so then
> > + * take a reference to this xe user and add this xe file to the
> > + * filelist belonging to this xe user
> > + */
> > + user = xe_user_lookup(xe, uid);
> > + if (!user) {
> > + /*
> > + * We couldn't find an existing xe user for the calling process.
> > + * Allocate a new struct xe_user and register it with this xe
> > + * device
> > + */
> > + user = xe_user_alloc();
> > + if (!user)
> > + return -ENOMEM;
> > +
> > +
> > + user->uid = uid;
> > + user->last_timestamp_ns = ktime_get_raw_ns();
> > + user->xe = xe;
> > +
> > + mutex_lock(&xe->work_period.lock);
> > + ret = xa_alloc(&xe->work_period.users, &idx, user, xa_limit_32b, GFP_KERNEL);
>
> You don't lock here either.
>
> Matt
>
> > + mutex_unlock(&xe->work_period.lock);
> > +
> > + if (ret < 0)
> > + return ret;
> > +
> > + user->id = idx;
> > + drm_dev_get(&xe->drm);
> > + }
> > +
> > + mutex_lock(&user->filelist_lock);
> > + list_add(&xef->user_link, &user->filelist);
> > + mutex_unlock(&user->filelist_lock);
> > + xef->user = user;
> > +
> > + return 0;
> > +}
> > diff --git a/drivers/gpu/drm/xe/xe_user.h b/drivers/gpu/drm/xe/xe_user.h
> > index 9628cc628a37..341200c55509 100644
> > --- a/drivers/gpu/drm/xe/xe_user.h
> > +++ b/drivers/gpu/drm/xe/xe_user.h
> > @@ -6,6 +6,9 @@
> > #ifndef _XE_USER_H_
> > #define _XE_USER_H_
> >
> > +#include "xe_device.h"
> > +
> > +
> > /**
> > * struct xe_user - xe user structure
> > *
> > @@ -40,6 +43,11 @@ struct xe_user {
> > */
> > struct work_struct work;
> >
> > + /**
> > + * @id: index of this user into the xe device::users xarray
> > + */
> > + u32 id;
> > +
> > /**
> > * @uid: UID of this xe_user
> > */
> > @@ -58,7 +66,8 @@ struct xe_user {
> > u64 last_timestamp_ns;
> > };
> >
> > -struct xe_user *xe_user_alloc(void);
> > +int xe_user_init(struct xe_device *xe, struct xe_file *xef, unsigned int uid);
> > +
> >
> > static inline struct xe_user *
> > xe_user_get(struct xe_user *user)
> > --
> > 2.49.0
> >
next prev parent reply other threads:[~2025-10-06 21:01 UTC|newest]
Thread overview: 17+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-10-06 14:20 [PATCH v5 0/8] [ANDROID]: Add GPU work period support for Xe driver Aakash Deep Sarkar
2025-10-06 14:20 ` [PATCH v5 1/8] drm/xe: Add a new xe_user structure Aakash Deep Sarkar
2025-10-06 14:20 ` [PATCH v5 2/8] drm/xe: Add xe_gt_clock_interval_to_ns function Aakash Deep Sarkar
2025-10-06 14:20 ` [PATCH v5 3/8] drm/xe: Modify xe_exec_queue_update_run_ticks Aakash Deep Sarkar
2025-10-06 14:20 ` [PATCH v5 4/8] drm/xe: Handle xe_user creation and removal Aakash Deep Sarkar
2025-10-06 20:49 ` Matthew Brost
2025-10-06 21:00 ` Matthew Brost [this message]
2025-10-06 14:20 ` [PATCH v5 5/8] drm/xe: Implement xe_work_period_worker Aakash Deep Sarkar
2025-10-06 21:12 ` Matthew Brost
2025-10-06 21:38 ` Matthew Brost
2025-10-06 14:20 ` [PATCH v5 6/8] drm/xe: Add a Kconfig option for GPU work period Aakash Deep Sarkar
2025-10-06 14:20 ` [PATCH v5 7/8] drm/xe: Handle xe_work_period destruction Aakash Deep Sarkar
2025-10-06 14:20 ` [PATCH v5 8/8] Hack patch: Do not merge Aakash Deep Sarkar
2025-10-06 15:03 ` ✗ CI.checkpatch: warning for : Add GPU work period support for Xe driver (rev5) Patchwork
2025-10-06 15:04 ` ✓ CI.KUnit: success " Patchwork
2025-10-06 15:58 ` ✗ Xe.CI.BAT: failure " Patchwork
2025-10-06 17:42 ` ✗ Xe.CI.Full: " Patchwork
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=aOQuAzLb6fcoJq4g@lstrano-desk.jf.intel.com \
--to=matthew.brost@intel.com \
--cc=aakash.deep.sarkar@intel.com \
--cc=ashutosh.dixit@intel.com \
--cc=carlos.santa@intel.com \
--cc=intel-xe@lists.freedesktop.org \
--cc=jani.nikula@intel.com \
--cc=jeevaka.badrappan@intel.com \
--cc=matthew.auld@intel.com \
--cc=rodrigo.vivi@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox