Re: [RFC 21/21] drm/xe: Register with the DRM scheduling cgroup controller

Intel-XE Archive on lore.kernel.org
 help / color / mirror / Atom feed

From: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
To: dri-devel@lists.freedesktop.org
Cc: amd-gfx@lists.freedesktop.org, kernel-dev@igalia.com,
	intel-xe@lists.freedesktop.org, cgroups@vger.kernel.org,
	linux-kernel@vger.kernel.org
Subject: Re: [RFC 21/21] drm/xe: Register with the DRM scheduling cgroup controller
Date: Thu, 4 Sep 2025 13:08:56 +0100	[thread overview]
Message-ID: <58866bb5-9fee-4709-9350-70b978feaed4@igalia.com> (raw)
In-Reply-To: <20250903152327.66002-22-tvrtko.ursulin@igalia.com>


On 03/09/2025 16:23, Tvrtko Ursulin wrote:
> Wire up the scheduling weight notification into the driver.
> 
> DRM cgroup controller will notify the driver of scheduling weights for
> each DRM client, which the driver will map into the three GuC scheduling
> priorities by giving the lowest weight client the low priority, and
> respectively the highest one high. The other clients will not be changed
> as will not be the ones which have individually specified a priority other
> than normal.
> 
> The priority changes are done from a delayed worker to coalesce
> potentially numerous updates and also to allow taking the mutexes from a
> callback which runs with preemption disabled.
> 
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
> ---
>   drivers/gpu/drm/xe/xe_device.c       | 18 +++++++
>   drivers/gpu/drm/xe/xe_device_types.h | 15 ++++++
>   drivers/gpu/drm/xe/xe_exec_queue.c   | 80 ++++++++++++++++++++++++++++
>   drivers/gpu/drm/xe/xe_exec_queue.h   |  5 ++
>   drivers/gpu/drm/xe/xe_guc_submit.c   |  8 ++-
>   drivers/gpu/drm/xe/xe_pm.c           |  4 ++
>   6 files changed, 129 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
> index 9e2952c9c06a..9fef10c50868 100644
> --- a/drivers/gpu/drm/xe/xe_device.c
> +++ b/drivers/gpu/drm/xe/xe_device.c
> @@ -112,6 +112,10 @@ static int xe_file_open(struct drm_device *dev, struct drm_file *file)
>   		put_task_struct(task);
>   	}
>   
> +#ifdef CONFIG_CGROUP_DRM
> +	xef->cg.prio = XE_EXEC_QUEUE_PRIORITY_NORMAL; // TODO: inherit current cgroup priority
> +#endif

For the record from the updated branch this TODO is addressed like this:

#ifdef CONFIG_CGROUP_DRM
	/*
	 * Set the initial values to valid ones but note that both will get set
	 * and updated as the DRM core will soon notify the cgroup controller
	 * that the new client has entered the group via
	 * drmcgroup_client_open(). That in turn will trigger the weight
	 * notifications and then xe_drm_cgroup_notify_weight() will update both
	 * shortly.
	 */
	atomic_set(&xef->cg.weight, CGROUP_WEIGHT_DFL);
	xef->cg.prio = XE_EXEC_QUEUE_PRIORITY_NORMAL;
#endif

Regards,

Tvrtko

> +
>   	return 0;
>   }
>   
> @@ -368,6 +372,12 @@ static const struct file_operations xe_driver_fops = {
>   	.fop_flags = FOP_UNSIGNED_OFFSET,
>   };
>   
> +#ifdef CONFIG_CGROUP_DRM
> +static const struct drm_cgroup_ops xe_drm_cgroup_ops = {
> +	.notify_weight = xe_drm_cgroup_notify_weight,
> +};
> +#endif
> +
>   static struct drm_driver driver = {
>   	/* Don't use MTRRs here; the Xserver or userspace app should
>   	 * deal with them for Intel hardware.
> @@ -386,6 +396,10 @@ static struct drm_driver driver = {
>   #ifdef CONFIG_PROC_FS
>   	.show_fdinfo = xe_drm_client_fdinfo,
>   #endif
> +
> +#ifdef CONFIG_CGROUP_DRM
> +	.cg_ops = &xe_drm_cgroup_ops,
> +#endif
>   	.ioctls = xe_ioctls,
>   	.num_ioctls = ARRAY_SIZE(xe_ioctls),
>   	.fops = &xe_driver_fops,
> @@ -500,6 +514,10 @@ struct xe_device *xe_device_create(struct pci_dev *pdev,
>   	if (err)
>   		goto err;
>   
> +#ifdef CONFIG_CGROUP_DRM
> +	INIT_DELAYED_WORK(&xe->cg.work, xe_drm_cgroup_work);
> +#endif
> +
>   	return xe;
>   
>   err:
> diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
> index 092004d14db2..dbc65a4aa08d 100644
> --- a/drivers/gpu/drm/xe/xe_device_types.h
> +++ b/drivers/gpu/drm/xe/xe_device_types.h
> @@ -19,6 +19,7 @@
>   #include "xe_oa_types.h"
>   #include "xe_platform_types.h"
>   #include "xe_pmu_types.h"
> +#include "xe_exec_queue_types.h"
>   #include "xe_pt_types.h"
>   #include "xe_sriov_pf_types.h"
>   #include "xe_sriov_types.h"
> @@ -34,6 +35,7 @@
>   struct dram_info;
>   struct intel_display;
>   struct intel_dg_nvm_dev;
> +struct xe_file;
>   struct xe_ggtt;
>   struct xe_i2c;
>   struct xe_pat_ops;
> @@ -624,6 +626,12 @@ struct xe_device {
>   		unsigned int czclk_freq;
>   	};
>   #endif
> +
> +#ifdef CONFIG_CGROUP_DRM
> +	struct {
> +		struct delayed_work	work;
> +	} cg;
> +#endif
>   };
>   
>   /**
> @@ -685,6 +693,13 @@ struct xe_file {
>   
>   	/** @refcount: ref count of this xe file */
>   	struct kref refcount;
> +
> +#ifdef CONFIG_CGROUP_DRM
> +	struct {
> +		atomic_t weight;
> +		enum xe_exec_queue_priority prio;
> +	} cg;
> +#endif
>   };
>   
>   #endif
> diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c
> index 063c89d981e5..2f072d2a0117 100644
> --- a/drivers/gpu/drm/xe/xe_exec_queue.c
> +++ b/drivers/gpu/drm/xe/xe_exec_queue.c
> @@ -1139,3 +1139,83 @@ void xe_exec_queue_jobs_ring_restore(struct xe_exec_queue *q)
>   	}
>   	spin_unlock(&sched->base.job_list_lock);
>   }
> +
> +#ifdef CONFIG_CGROUP_DRM
> +void xe_drm_cgroup_work(struct work_struct *work)
> +{
> +	struct xe_device *xe = container_of(work, typeof(*xe), cg.work.work);
> +	unsigned int weight, min = UINT_MAX, max = 0;
> +	struct drm_device *dev = &xe->drm;
> +	struct drm_file *file;
> +	struct xe_file *xef;
> +
> +	mutex_lock(&dev->filelist_mutex);
> +
> +	list_for_each_entry(file, &dev->filelist, lhead) {
> +		xef = to_xe_file(file);
> +		weight = atomic_read(&xef->cg.weight);
> +
> +		if (!weight)
> +			continue;
> +
> +		if (weight < min)
> +			min = weight;
> +
> +		if (weight > max)
> +			max = weight;
> +	}
> +
> +	list_for_each_entry(file, &dev->filelist, lhead) {
> +		enum xe_exec_queue_priority new_prio;
> +		struct xe_exec_queue *q;
> +		unsigned long i;
> +
> +		xef = to_xe_file(file);
> +		weight = atomic_read(&xef->cg.weight);
> +
> +		if (max == min)
> +			new_prio = XE_EXEC_QUEUE_PRIORITY_NORMAL;
> +		else if (weight == max)
> +			new_prio = XE_EXEC_QUEUE_PRIORITY_HIGH;
> +		else if (weight == min)
> +			new_prio = XE_EXEC_QUEUE_PRIORITY_LOW;
> +		else
> +			new_prio = XE_EXEC_QUEUE_PRIORITY_NORMAL;
> +
> +		if (new_prio == xef->cg.prio)
> +			continue;
> +
> +		mutex_lock(&xef->exec_queue.lock);
> +		xa_for_each(&xef->exec_queue.xa, i, q) {
> +			if (q->sched_props.priority !=
> +			    XE_EXEC_QUEUE_PRIORITY_NORMAL)
> +				continue;
> +
> +			xe_exec_queue_get(q);
> +			mutex_unlock(&xef->exec_queue.lock);
> +
> +			q->ops->set_priority(q, new_prio);
> +
> +			mutex_lock(&xef->exec_queue.lock);
> +			xe_exec_queue_put(q);
> +		}
> +		mutex_unlock(&xef->exec_queue.lock);
> +
> +		xef->cg.prio = new_prio;
> +	}
> +
> +	mutex_unlock(&dev->filelist_mutex);
> +}
> +
> +void xe_drm_cgroup_notify_weight(struct drm_file *file_priv,
> +				 unsigned int weight)
> +{
> +	struct xe_file *xef = to_xe_file(file_priv);
> +	struct xe_device *xe = xef->xe;
> +
> +	atomic_set(&xef->cg.weight, weight);
> +
> +	queue_delayed_work(system_unbound_wq, &xe->cg.work,
> +			   msecs_to_jiffies(100));
> +}
> +#endif
> diff --git a/drivers/gpu/drm/xe/xe_exec_queue.h b/drivers/gpu/drm/xe/xe_exec_queue.h
> index 15ec852e7f7e..5f6b42c74086 100644
> --- a/drivers/gpu/drm/xe/xe_exec_queue.h
> +++ b/drivers/gpu/drm/xe/xe_exec_queue.h
> @@ -95,4 +95,9 @@ int xe_exec_queue_contexts_hwsp_rebase(struct xe_exec_queue *q, void *scratch);
>   void xe_exec_queue_jobs_ring_restore(struct xe_exec_queue *q);
>   
>   struct xe_lrc *xe_exec_queue_lrc(struct xe_exec_queue *q);
> +
> +void xe_drm_cgroup_notify_weight(struct drm_file *file_priv,
> +				 unsigned int weight);
> +void xe_drm_cgroup_work(struct work_struct *work);
> +
>   #endif
> diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
> index 86daf6f4728f..df1252f4cd62 100644
> --- a/drivers/gpu/drm/xe/xe_guc_submit.c
> +++ b/drivers/gpu/drm/xe/xe_guc_submit.c
> @@ -427,13 +427,19 @@ static const int xe_exec_queue_prio_to_guc[] = {
>   static void init_policies(struct xe_guc *guc, struct xe_exec_queue *q)
>   {
>   	struct exec_queue_policy policy;
> -	enum xe_exec_queue_priority prio = q->sched_props.priority;
> +	enum xe_exec_queue_priority prio;
>   	u32 timeslice_us = q->sched_props.timeslice_us;
>   	u32 slpc_exec_queue_freq_req = 0;
>   	u32 preempt_timeout_us = q->sched_props.preempt_timeout_us;
>   
>   	xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q));
>   
> +	prio = q->sched_props.priority;
> +#ifdef CONFIG_CGROUP_DRM
> +	if (prio == XE_EXEC_QUEUE_PRIORITY_NORMAL && q->xef)
> +		prio = q->xef->cg.prio;
> +#endif
> +
>   	if (q->flags & EXEC_QUEUE_FLAG_LOW_LATENCY)
>   		slpc_exec_queue_freq_req |= SLPC_CTX_FREQ_REQ_IS_COMPUTE;
>   
> diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c
> index a2e85030b7f4..67291f19213b 100644
> --- a/drivers/gpu/drm/xe/xe_pm.c
> +++ b/drivers/gpu/drm/xe/xe_pm.c
> @@ -124,6 +124,10 @@ int xe_pm_suspend(struct xe_device *xe)
>   	drm_dbg(&xe->drm, "Suspending device\n");
>   	trace_xe_pm_suspend(xe, __builtin_return_address(0));
>   
> +#ifdef CONFIG_CGROUP_DRM
> +	cancel_delayed_work_sync(&xe->cg.work);
> +#endif
> +
>   	err = xe_pxp_pm_suspend(xe->pxp);
>   	if (err)
>   		goto err;

next prev parent reply	other threads:[~2025-09-04 12:09 UTC|newest]

Thread overview: 32+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-09-03 15:23 [RFC v8 00/21] DRM scheduling cgroup controller Tvrtko Ursulin
2025-09-03 15:23 ` [RFC 01/21] drm/sched: Add some scheduling quality unit tests Tvrtko Ursulin
2025-09-03 15:23 ` [RFC 02/21] drm/sched: Add some more " Tvrtko Ursulin
2025-09-03 15:23 ` [RFC 03/21] drm/sched: Implement RR via FIFO Tvrtko Ursulin
2025-09-03 15:23 ` [RFC 04/21] drm/sched: Consolidate entity run queue management Tvrtko Ursulin
2025-09-03 15:23 ` [RFC 05/21] drm/sched: Move run queue related code into a separate file Tvrtko Ursulin
2025-09-03 15:23 ` [RFC 06/21] drm/sched: Free all finished jobs at once Tvrtko Ursulin
2025-09-03 15:23 ` [RFC 07/21] drm/sched: Account entity GPU time Tvrtko Ursulin
2025-09-03 15:23 ` [RFC 08/21] drm/sched: Remove idle entity from tree Tvrtko Ursulin
2025-09-03 15:23 ` [RFC 09/21] drm/sched: Add fair scheduling policy Tvrtko Ursulin
2025-09-03 15:23 ` [RFC 10/21] drm/sched: Break submission patterns with some randomness Tvrtko Ursulin
2025-09-03 15:23 ` [RFC 11/21] drm/sched: Remove FIFO and RR and simplify to a single run queue Tvrtko Ursulin
2025-09-03 15:23 ` [RFC 12/21] drm/sched: Embed run queue singleton into the scheduler Tvrtko Ursulin
2025-09-03 15:23 ` [RFC 13/21] cgroup: Add the DRM cgroup controller Tvrtko Ursulin
2025-09-03 15:23 ` [RFC 14/21] cgroup/drm: Track DRM clients per cgroup Tvrtko Ursulin
2025-09-03 15:23 ` [RFC 15/21] cgroup/drm: Add scheduling weight callback Tvrtko Ursulin
2025-09-03 15:23 ` [RFC 16/21] cgroup/drm: Introduce weight based scheduling control Tvrtko Ursulin
2025-09-03 15:23 ` [RFC 17/21] drm/sched: Add helper for tracking entities per client Tvrtko Ursulin
2025-09-03 15:23 ` [RFC 18/21] drm/sched: Add helper for DRM cgroup controller weight notifications Tvrtko Ursulin
2025-09-03 15:23 ` [RFC 19/21] drm/amdgpu: Register with the DRM scheduling cgroup controller Tvrtko Ursulin
2025-09-03 15:23 ` [RFC 20/21] drm/xe: Allow changing GuC scheduling priority Tvrtko Ursulin
2025-09-03 15:23 ` [RFC 21/21] drm/xe: Register with the DRM scheduling cgroup controller Tvrtko Ursulin
2025-09-04 12:08   ` Tvrtko Ursulin [this message]
2025-09-29 14:07 ` [RFC v8 00/21] " Danilo Krummrich
2025-09-30  9:00   ` Philipp Stanner
2025-09-30  9:28     ` DRM Jobqueue design (was "[RFC v8 00/21] DRM scheduling cgroup controller") Danilo Krummrich
2025-09-30 10:12     ` [RFC v8 00/21] DRM scheduling cgroup controller Boris Brezillon
2025-09-30 10:58       ` Danilo Krummrich
2025-09-30 11:57         ` Boris Brezillon
2025-10-07 14:44           ` Danilo Krummrich
2025-10-07 15:44             ` Boris Brezillon
2025-10-23 11:18   ` Tvrtko Ursulin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=58866bb5-9fee-4709-9350-70b978feaed4@igalia.com \
    --to=tvrtko.ursulin@igalia.com \
    --cc=amd-gfx@lists.freedesktop.org \
    --cc=cgroups@vger.kernel.org \
    --cc=dri-devel@lists.freedesktop.org \
    --cc=intel-xe@lists.freedesktop.org \
    --cc=kernel-dev@igalia.com \
    --cc=linux-kernel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox