Re: [PATCH v5 4/8] drm/xe: Handle xe_user creation and removal

Intel-XE Archive on lore.kernel.org
 help / color / mirror / Atom feed

From: Matthew Brost <matthew.brost@intel.com>
To: Aakash Deep Sarkar <aakash.deep.sarkar@intel.com>
Cc: <intel-xe@lists.freedesktop.org>, <jeevaka.badrappan@intel.com>,
	<rodrigo.vivi@intel.com>, <carlos.santa@intel.com>,
	<matthew.auld@intel.com>,  <jani.nikula@intel.com>,
	<ashutosh.dixit@intel.com>
Subject: Re: [PATCH v5 4/8] drm/xe: Handle xe_user creation and removal
Date: Mon, 6 Oct 2025 14:00:51 -0700	[thread overview]
Message-ID: <aOQuAzLb6fcoJq4g@lstrano-desk.jf.intel.com> (raw)
In-Reply-To: <aOQrXGKw30BJiL+8@lstrano-desk.jf.intel.com>

On Mon, Oct 06, 2025 at 01:49:32PM -0700, Matthew Brost wrote:
> On Mon, Oct 06, 2025 at 02:20:25PM +0000, Aakash Deep Sarkar wrote:
> > We want our xe user structure to be created when a new
> > user id opens the xe device node and to be destroyed
> > when the final xe file with this uid is closed. In other
> > words the xe_user structure for a uid should remain in
> > scope as long as any process with this uid has an open
> > xe file descriptor.
> > 
> > To implement this we maintain an xarray of xe user
> > structures inside our xe device instance. Whenever a new
> > xe file is created via an open call, we check if the
> > calling process' uid is already present in our xarray.
> > If so, we increment the refcount for the associated
> > xe user and add this xe file to the list of xe files
> > belonging to this xe user. Otherwise, we allocate a
> > new xe user structure for this uid and initialize its
> > file list with this xe file.
> > 
> > Whenever an xe file is destroyed, we decrement the
> > refcount of the associated xe user. When the last
> > xe file in the xe user's file list is destroyed,
> > the xe user refcount should drop to zero and the
> > xe user should be cleaned up. During the cleanup path
> > we remove the xarray entry for this xe user in our
> > xe device and free up its memory.
> > 
> > Signed-off-by: Aakash Deep Sarkar <aakash.deep.sarkar@intel.com>
> > ---
> >  drivers/gpu/drm/xe/xe_device.c       | 21 ++++++++
> >  drivers/gpu/drm/xe/xe_device_types.h | 16 ++++++
> >  drivers/gpu/drm/xe/xe_user.c         | 77 +++++++++++++++++++++++++++-
> >  drivers/gpu/drm/xe/xe_user.h         | 11 +++-
> >  4 files changed, 123 insertions(+), 2 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
> > index 386940323630..5a084fd39876 100644
> > --- a/drivers/gpu/drm/xe/xe_device.c
> > +++ b/drivers/gpu/drm/xe/xe_device.c
> > @@ -65,6 +65,7 @@
> >  #include "xe_tile.h"
> >  #include "xe_ttm_stolen_mgr.h"
> >  #include "xe_ttm_sys_mgr.h"
> > +#include "xe_user.h"
> >  #include "xe_vm.h"
> >  #include "xe_vm_madvise.h"
> >  #include "xe_vram.h"
> > @@ -82,7 +83,9 @@ static int xe_file_open(struct drm_device *dev, struct drm_file *file)
> >  	struct xe_drm_client *client;
> >  	struct xe_file *xef;
> >  	int ret = -ENOMEM;
> > +	int uid = -EINVAL;
> >  	struct task_struct *task = NULL;
> > +	const struct cred *cred = NULL;
> >  
> >  	xef = kzalloc(sizeof(*xef), GFP_KERNEL);
> >  	if (!xef)
> > @@ -107,8 +110,16 @@ static int xe_file_open(struct drm_device *dev, struct drm_file *file)
> >  	file->driver_priv = xef;
> >  	kref_init(&xef->refcount);
> >  
> > +	INIT_LIST_HEAD(&xef->user_link);
> > +
> >  	task = get_pid_task(rcu_access_pointer(file->pid), PIDTYPE_PID);
> >  	if (task) {
> > +		cred = get_task_cred(task);
> > +		if (cred) {
> > +			uid = (unsigned int) cred->euid.val;
> > +			xe_user_init(xe, xef, uid);
> > +			put_cred(cred);
> > +		}
> >  		xef->process_name = kstrdup(task->comm, GFP_KERNEL);
> >  		xef->pid = task->pid;
> >  		put_task_struct(task);
> > @@ -128,6 +139,12 @@ static void xe_file_destroy(struct kref *ref)
> >  
> >  	xe_drm_client_put(xef->client);
> >  	kfree(xef->process_name);
> > +
> > +	mutex_lock(&xef->user->filelist_lock);
> > +	list_del(&xef->user_link);
> > +	mutex_unlock(&xef->user->filelist_lock);
> > +
> > +	xe_user_put(xef->user);
> >  	kfree(xef);
> >  }
> >  
> > @@ -467,6 +484,10 @@ struct xe_device *xe_device_create(struct pci_dev *pdev,
> >  
> >  	xa_init_flags(&xe->usm.asid_to_vm, XA_FLAGS_ALLOC);
> >  
> > +	xa_init_flags(&xe->work_period.users, XA_FLAGS_ALLOC1);
> > +
> > +	mutex_init(&xe->work_period.lock);
> > +
> >  	if (IS_ENABLED(CONFIG_DRM_XE_DEBUG)) {
> >  		/* Trigger a large asid and an early asid wrap. */
> >  		u32 asid;
> > diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
> > index 54a612787289..4d4e9a63b3fd 100644
> > --- a/drivers/gpu/drm/xe/xe_device_types.h
> > +++ b/drivers/gpu/drm/xe/xe_device_types.h
> > @@ -613,6 +613,16 @@ struct xe_device {
> >  	atomic_t g2g_test_count;
> >  #endif
> >  
> > +	/**
> > +	 * @xe_work_period: Support for GPU work period tracepoint
> > +	 */
> > +	struct xe_work_period {
> > +		/** @users: list of users that have opened this xe device */
> > +		struct xarray users;
> > +		/** @lock: lock protecting this structure */
> > +		struct mutex lock;
> > +	} work_period;
> > +
> >  	/* private: */
> >  
> >  #if IS_ENABLED(CONFIG_DRM_XE_DISPLAY)
> > @@ -684,6 +694,12 @@ struct xe_file {
> >  	/** @active_duration_ns: total run time in ns for this xe file */
> >  	u64 active_duration_ns;
> >  
> > +	/** @user: pointer to struct xe_user associated with this xe file */
> > +	struct xe_user *user;
> > +
> > +	/** @user_link: link into xe_user::filelist */
> > +	struct list_head user_link;
> > +
> >  	/** @client: drm client */
> >  	struct xe_drm_client *client;
> >  
> > diff --git a/drivers/gpu/drm/xe/xe_user.c b/drivers/gpu/drm/xe/xe_user.c
> > index f35e18776300..cb3de75aa497 100644
> > --- a/drivers/gpu/drm/xe/xe_user.c
> > +++ b/drivers/gpu/drm/xe/xe_user.c
> > @@ -3,6 +3,8 @@
> >   * Copyright © 2025 Intel Corporation
> >   */
> >  
> > +#include <drm/drm_drv.h>
> > +
> >  #include "xe_user.h"
> >  
> >  
> > @@ -60,7 +62,7 @@
> >   *
> >   * Return: pointer to user struct or NULL if can't allocate
> >   */
> > -struct xe_user *xe_user_alloc(void)
> > +static struct xe_user *xe_user_alloc(void)
> >  {
> >  	struct xe_user *user;
> >  
> > @@ -71,6 +73,7 @@ struct xe_user *xe_user_alloc(void)
> >  	kref_init(&user->refcount);
> >  	mutex_init(&user->filelist_lock);
> >  	INIT_LIST_HEAD(&user->filelist);
> > +	INIT_WORK(&user->work, work_period_worker);
> >  	return user;
> >  }
> >  
> > @@ -84,6 +87,78 @@ void __xe_user_free(struct kref *kref)
> >  {
> >  	struct xe_user *user =
> >  		container_of(kref, struct xe_user, refcount);
> > +	struct xe_device *xe = user->xe;
> > +	void *lookup;
> > +
> > +	mutex_lock(&xe->work_period.lock);
> 
> You don't need to take work_period.lock look here. Xa have there own
> locking. work_period.lock should protect the reference upon lookup,
> that's it.
> 

Actually I mis-spoke here. You do need this part with how you have it
coded but something looks very odd with the work_period.lock / xarray /
ref counting / waiting on workers under work_period.lock scheme.

Let me think on this part for a bit.

Matt

> > +	lookup = xa_erase(&xe->work_period.users, user->id);
> > +	xe_assert(xe, lookup == user);
> > +	mutex_unlock(&xe->work_period.lock);
> >  
> > +	drm_dev_put(&user->xe->drm);
> >  	kfree(user);
> >  }
> > +
> > +static struct xe_user *xe_user_lookup(struct xe_device *xe, u32 uid)
> > +{
> > +	struct xe_user *user = NULL;
> > +	unsigned long i;
> > +
> > +	mutex_lock(&xe->work_period.lock);
> 
> guard(mutex)(&xe->work_period.lock) will work better here.
> 
> > +	xa_for_each(&xe->work_period.users, i, user) {
> > +		if (user->uid == uid) {
> > +			xe_user_get(user);
> > +			mutex_unlock(&xe->work_period.lock);
> > +			return user;
> > +		}
> > +	}
> > +	mutex_unlock(&xe->work_period.lock);
> > +
> > +	return NULL;
> > +}
> > +
> > +int xe_user_init(struct xe_device *xe, struct xe_file *xef, unsigned int uid)
> > +{
> > +	struct xe_user *user = NULL;
> > +	int ret;
> > +	u32 idx;
> > +	/*
> > +	 * Check if the calling process/uid has already been registered
> > +	 * with the xe device during a previous open call. If so then
> > +	 * take a reference to this xe user and add this xe file to the
> > +	 * filelist belonging to this xe user
> > +	 */
> > +	user = xe_user_lookup(xe, uid);
> > +	if (!user) {
> > +		/*
> > +		 * We couldn't find an existing xe user for the calling process.
> > +		 * Allocate a new struct xe_user and register it with this xe
> > +		 * device
> > +		 */
> > +		user = xe_user_alloc();
> > +		if (!user)
> > +			return -ENOMEM;
> > +
> > +
> > +		user->uid = uid;
> > +		user->last_timestamp_ns = ktime_get_raw_ns();
> > +		user->xe = xe;
> > +
> > +		mutex_lock(&xe->work_period.lock);
> > +		ret = xa_alloc(&xe->work_period.users, &idx, user, xa_limit_32b, GFP_KERNEL);
> 
> You don't lock here either.
> 
> Matt
> 
> > +		mutex_unlock(&xe->work_period.lock);
> > +
> > +		if (ret < 0)
> > +			return ret;
> > +
> > +		user->id = idx;
> > +		drm_dev_get(&xe->drm);
> > +	}
> > +
> > +	mutex_lock(&user->filelist_lock);
> > +	list_add(&xef->user_link, &user->filelist);
> > +	mutex_unlock(&user->filelist_lock);
> > +	xef->user = user;
> > +
> > +	return 0;
> > +}
> > diff --git a/drivers/gpu/drm/xe/xe_user.h b/drivers/gpu/drm/xe/xe_user.h
> > index 9628cc628a37..341200c55509 100644
> > --- a/drivers/gpu/drm/xe/xe_user.h
> > +++ b/drivers/gpu/drm/xe/xe_user.h
> > @@ -6,6 +6,9 @@
> >  #ifndef _XE_USER_H_
> >  #define _XE_USER_H_
> >  
> > +#include "xe_device.h"
> > +
> > +
> >  /**
> >   * struct xe_user - xe user structure
> >   *
> > @@ -40,6 +43,11 @@ struct xe_user {
> >  	 */
> >  	struct work_struct work;
> >  
> > +	/**
> > +	 * @id: index of this user into the xe device::users xarray
> > +	 */
> > +	u32 id;
> > +
> >  	/**
> >  	 * @uid: UID of this xe_user
> >  	 */
> > @@ -58,7 +66,8 @@ struct xe_user {
> >  	u64 last_timestamp_ns;
> >  };
> >  
> > -struct xe_user *xe_user_alloc(void);
> > +int xe_user_init(struct xe_device *xe, struct xe_file *xef, unsigned int uid);
> > +
> >  
> >  static inline struct xe_user *
> >  xe_user_get(struct xe_user *user)
> > -- 
> > 2.49.0
> >

next prev parent reply	other threads:[~2025-10-06 21:01 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-10-06 14:20 [PATCH v5 0/8] [ANDROID]: Add GPU work period support for Xe driver Aakash Deep Sarkar
2025-10-06 14:20 ` [PATCH v5 1/8] drm/xe: Add a new xe_user structure Aakash Deep Sarkar
2025-10-06 14:20 ` [PATCH v5 2/8] drm/xe: Add xe_gt_clock_interval_to_ns function Aakash Deep Sarkar
2025-10-06 14:20 ` [PATCH v5 3/8] drm/xe: Modify xe_exec_queue_update_run_ticks Aakash Deep Sarkar
2025-10-06 14:20 ` [PATCH v5 4/8] drm/xe: Handle xe_user creation and removal Aakash Deep Sarkar
2025-10-06 20:49   ` Matthew Brost
2025-10-06 21:00     ` Matthew Brost [this message]
2025-10-06 14:20 ` [PATCH v5 5/8] drm/xe: Implement xe_work_period_worker Aakash Deep Sarkar
2025-10-06 21:12   ` Matthew Brost
2025-10-06 21:38     ` Matthew Brost
2025-10-06 14:20 ` [PATCH v5 6/8] drm/xe: Add a Kconfig option for GPU work period Aakash Deep Sarkar
2025-10-06 14:20 ` [PATCH v5 7/8] drm/xe: Handle xe_work_period destruction Aakash Deep Sarkar
2025-10-06 14:20 ` [PATCH v5 8/8] Hack patch: Do not merge Aakash Deep Sarkar
2025-10-06 15:03 ` ✗ CI.checkpatch: warning for : Add GPU work period support for Xe driver (rev5) Patchwork
2025-10-06 15:04 ` ✓ CI.KUnit: success " Patchwork
2025-10-06 15:58 ` ✗ Xe.CI.BAT: failure " Patchwork
2025-10-06 17:42 ` ✗ Xe.CI.Full: " Patchwork

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=aOQuAzLb6fcoJq4g@lstrano-desk.jf.intel.com \
    --to=matthew.brost@intel.com \
    --cc=aakash.deep.sarkar@intel.com \
    --cc=ashutosh.dixit@intel.com \
    --cc=carlos.santa@intel.com \
    --cc=intel-xe@lists.freedesktop.org \
    --cc=jani.nikula@intel.com \
    --cc=jeevaka.badrappan@intel.com \
    --cc=matthew.auld@intel.com \
    --cc=rodrigo.vivi@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox