Re: [PATCH 05/28] drm/sched: Consolidate entity run queue management

dri-devel.lists.freedesktop.org archive mirror
 help / color / mirror / Atom feed

From: Philipp Stanner <phasta@mailbox.org>
To: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>,
	amd-gfx@lists.freedesktop.org,  dri-devel@lists.freedesktop.org
Cc: kernel-dev@igalia.com,
	"Christian König" <christian.koenig@amd.com>,
	"Danilo Krummrich" <dakr@kernel.org>,
	"Matthew Brost" <matthew.brost@intel.com>,
	"Philipp Stanner" <phasta@kernel.org>
Subject: Re: [PATCH 05/28] drm/sched: Consolidate entity run queue management
Date: Fri, 10 Oct 2025 12:49:34 +0200	[thread overview]
Message-ID: <762e3469df06787205af88e068d72f60dfaebda4.camel@mailbox.org> (raw)
In-Reply-To: <20251008085359.52404-6-tvrtko.ursulin@igalia.com>

On Wed, 2025-10-08 at 09:53 +0100, Tvrtko Ursulin wrote:
> Move the code dealing with entities entering and exiting run queues to
> helpers to logically separate it from jobs entering and exiting entities.
> 
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
> Cc: Christian König <christian.koenig@amd.com>
> Cc: Danilo Krummrich <dakr@kernel.org>
> Cc: Matthew Brost <matthew.brost@intel.com>
> Cc: Philipp Stanner <phasta@kernel.org>
> ---
>  drivers/gpu/drm/scheduler/sched_entity.c   | 64 ++-------------
>  drivers/gpu/drm/scheduler/sched_internal.h |  8 +-
>  drivers/gpu/drm/scheduler/sched_main.c     | 95 +++++++++++++++++++---
>  3 files changed, 91 insertions(+), 76 deletions(-)
> 
> diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c
> index 4852006f2308..7a0a52ba87bf 100644
> --- a/drivers/gpu/drm/scheduler/sched_entity.c
> +++ b/drivers/gpu/drm/scheduler/sched_entity.c
> @@ -456,24 +456,9 @@ drm_sched_job_dependency(struct drm_sched_job *job,
>  	return NULL;
>  }
>  
> -static ktime_t
> -drm_sched_rq_get_rr_ts(struct drm_sched_rq *rq, struct drm_sched_entity *entity)
> -{
> -	ktime_t ts;
> -
> -	lockdep_assert_held(&entity->lock);
> -	lockdep_assert_held(&rq->lock);
> -
> -	ts = ktime_add_ns(rq->rr_ts, 1);
> -	entity->rr_ts = ts;
> -	rq->rr_ts = ts;
> -
> -	return ts;
> -}
> -
>  struct drm_sched_job *drm_sched_entity_pop_job(struct drm_sched_entity *entity)
>  {
> -	struct drm_sched_job *sched_job, *next_job;
> +	struct drm_sched_job *sched_job;

`next_job` has been added in a previous job. Have you tried whether
patch-order can be reversed?

Just asking; I don't want to cause unnecessary work here

>  
>  	sched_job = drm_sched_entity_queue_peek(entity);
>  	if (!sched_job)
> @@ -502,26 +487,7 @@ struct drm_sched_job *drm_sched_entity_pop_job(struct drm_sched_entity *entity)
>  
>  	spsc_queue_pop(&entity->job_queue);
>  
> -	/*
> -	 * Update the entity's location in the min heap according to
> -	 * the timestamp of the next job, if any.
> -	 */
> -	next_job = drm_sched_entity_queue_peek(entity);
> -	if (next_job) {
> -		struct drm_sched_rq *rq;
> -		ktime_t ts;
> -
> -		spin_lock(&entity->lock);
> -		rq = entity->rq;
> -		spin_lock(&rq->lock);
> -		if (drm_sched_policy == DRM_SCHED_POLICY_FIFO)
> -			ts = next_job->submit_ts;
> -		else
> -			ts = drm_sched_rq_get_rr_ts(rq, entity);
> -		drm_sched_rq_update_fifo_locked(entity, rq, ts);
> -		spin_unlock(&rq->lock);
> -		spin_unlock(&entity->lock);
> -	}
> +	drm_sched_rq_pop_entity(entity);
>  
>  	/* Jobs and entities might have different lifecycles. Since we're
>  	 * removing the job from the entities queue, set the jobs entity pointer
> @@ -611,30 +577,10 @@ void drm_sched_entity_push_job(struct drm_sched_job *sched_job)
>  	/* first job wakes up scheduler */
>  	if (first) {
>  		struct drm_gpu_scheduler *sched;
> -		struct drm_sched_rq *rq;
>  
> -		/* Add the entity to the run queue */
> -		spin_lock(&entity->lock);
> -		if (entity->stopped) {
> -			spin_unlock(&entity->lock);
> -
> -			DRM_ERROR("Trying to push to a killed entity\n");
> -			return;
> -		}
> -
> -		rq = entity->rq;
> -		sched = rq->sched;
> -
> -		spin_lock(&rq->lock);
> -		drm_sched_rq_add_entity(rq, entity);
> -		if (drm_sched_policy == DRM_SCHED_POLICY_RR)
> -			submit_ts = entity->rr_ts;
> -		drm_sched_rq_update_fifo_locked(entity, rq, submit_ts);
> -
> -		spin_unlock(&rq->lock);
> -		spin_unlock(&entity->lock);
> -
> -		drm_sched_wakeup(sched);
> +		sched = drm_sched_rq_add_entity(entity, submit_ts);
> +		if (sched)
> +			drm_sched_wakeup(sched);
>  	}
>  }
>  EXPORT_SYMBOL(drm_sched_entity_push_job);
> diff --git a/drivers/gpu/drm/scheduler/sched_internal.h b/drivers/gpu/drm/scheduler/sched_internal.h
> index 7ea5a6736f98..8269c5392a82 100644
> --- a/drivers/gpu/drm/scheduler/sched_internal.h
> +++ b/drivers/gpu/drm/scheduler/sched_internal.h
> @@ -12,13 +12,11 @@ extern int drm_sched_policy;
>  
>  void drm_sched_wakeup(struct drm_gpu_scheduler *sched);
>  
> -void drm_sched_rq_add_entity(struct drm_sched_rq *rq,
> -			     struct drm_sched_entity *entity);
> +struct drm_gpu_scheduler *
> +drm_sched_rq_add_entity(struct drm_sched_entity *entity, ktime_t ts);
>  void drm_sched_rq_remove_entity(struct drm_sched_rq *rq,
>  				struct drm_sched_entity *entity);
> -
> -void drm_sched_rq_update_fifo_locked(struct drm_sched_entity *entity,
> -				     struct drm_sched_rq *rq, ktime_t ts);
> +void drm_sched_rq_pop_entity(struct drm_sched_entity *entity);
>  
>  void drm_sched_entity_select_rq(struct drm_sched_entity *entity);
>  struct drm_sched_job *drm_sched_entity_pop_job(struct drm_sched_entity *entity);
> diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
> index 8e62541b439a..e5d02c28665c 100644
> --- a/drivers/gpu/drm/scheduler/sched_main.c
> +++ b/drivers/gpu/drm/scheduler/sched_main.c
> @@ -151,9 +151,9 @@ static void drm_sched_rq_remove_fifo_locked(struct drm_sched_entity *entity,
>  	}
>  }
>  
> -void drm_sched_rq_update_fifo_locked(struct drm_sched_entity *entity,
> -				     struct drm_sched_rq *rq,
> -				     ktime_t ts)
> +static void drm_sched_rq_update_fifo_locked(struct drm_sched_entity *entity,
> +					    struct drm_sched_rq *rq,
> +					    ktime_t ts)
>  {
>  	/*
>  	 * Both locks need to be grabbed, one to protect from entity->rq change
> @@ -191,22 +191,45 @@ static void drm_sched_rq_init(struct drm_sched_rq *rq,
>  /**
>   * drm_sched_rq_add_entity - add an entity
>   *
> - * @rq: scheduler run queue
>   * @entity: scheduler entity
> + * @ts: submission timestamp
>   *
>   * Adds a scheduler entity to the run queue.
> + *
> + * Returns a DRM scheduler pre-selected to handle this entity.
>   */
> -void drm_sched_rq_add_entity(struct drm_sched_rq *rq,
> -			     struct drm_sched_entity *entity)
> +struct drm_gpu_scheduler *
> +drm_sched_rq_add_entity(struct drm_sched_entity *entity, ktime_t ts)
>  {

I'm not sure if it's a good idea to have the scheduler returned from
that function. That doesn't make a whole lot of sense semantically.

At the very least the function's docstring, maybe even its name, should
be adjusted to detail why this makes sense. The commit message, too.
It's not trivially understood.

I think I get why it's being done, but writing it down black on white
gives us something to grasp.

Sth like "adds an entity to a runqueue, selects to appropriate
scheduler and returns it for the purpose of XYZ"

> -	lockdep_assert_held(&entity->lock);
> -	lockdep_assert_held(&rq->lock);
> +	struct drm_gpu_scheduler *sched;
> +	struct drm_sched_rq *rq;
>  
> -	if (!list_empty(&entity->list))
> -		return;
> +	/* Add the entity to the run queue */
> +	spin_lock(&entity->lock);
> +	if (entity->stopped) {
> +		spin_unlock(&entity->lock);
>  
> -	atomic_inc(rq->sched->score);
> -	list_add_tail(&entity->list, &rq->entities);
> +		DRM_ERROR("Trying to push to a killed entity\n");
> +		return NULL;
> +	}
> +
> +	rq = entity->rq;
> +	spin_lock(&rq->lock);
> +	sched = rq->sched;
> +
> +	if (list_empty(&entity->list)) {
> +		atomic_inc(sched->score);
> +		list_add_tail(&entity->list, &rq->entities);
> +	}
> +
> +	if (drm_sched_policy == DRM_SCHED_POLICY_RR)
> +		ts = entity->rr_ts;
> +	drm_sched_rq_update_fifo_locked(entity, rq, ts);
> +
> +	spin_unlock(&rq->lock);
> +	spin_unlock(&entity->lock);
> +
> +	return sched;
>  }
>  
>  /**
> @@ -235,6 +258,54 @@ void drm_sched_rq_remove_entity(struct drm_sched_rq *rq,
>  	spin_unlock(&rq->lock);
>  }
>  
> +static ktime_t
> +drm_sched_rq_get_rr_ts(struct drm_sched_rq *rq, struct drm_sched_entity *entity)
> +{
> +	ktime_t ts;
> +
> +	lockdep_assert_held(&entity->lock);
> +	lockdep_assert_held(&rq->lock);
> +
> +	ts = ktime_add_ns(rq->rr_ts, 1);
> +	entity->rr_ts = ts;
> +	rq->rr_ts = ts;

I mentioned that pattern in a previous patch. "get_rr_ts" doesn't
appear like an obvious name since you're actually setting data here.

P.

> +
> +	return ts;
> +}
> +
> +/**
> + * drm_sched_rq_pop_entity - pops an entity
> + *
> + * @entity: scheduler entity
> + *
> + * To be called every time after a job is popped from the entity.
> + */
> +void drm_sched_rq_pop_entity(struct drm_sched_entity *entity)
> +{
> +	struct drm_sched_job *next_job;
> +	struct drm_sched_rq *rq;
> +	ktime_t ts;
> +
> +	/*
> +	 * Update the entity's location in the min heap according to
> +	 * the timestamp of the next job, if any.
> +	 */
> +	next_job = drm_sched_entity_queue_peek(entity);
> +	if (!next_job)
> +		return;
> +
> +	spin_lock(&entity->lock);
> +	rq = entity->rq;
> +	spin_lock(&rq->lock);
> +	if (drm_sched_policy == DRM_SCHED_POLICY_FIFO)
> +		ts = next_job->submit_ts;
> +	else
> +		ts = drm_sched_rq_get_rr_ts(rq, entity);
> +	drm_sched_rq_update_fifo_locked(entity, rq, ts);
> +	spin_unlock(&rq->lock);
> +	spin_unlock(&entity->lock);
> +}
> +
>  /**
>   * drm_sched_rq_select_entity - Select an entity which provides a job to run
>   *

next prev parent reply	other threads:[~2025-10-10 10:49 UTC|newest]

Thread overview: 76+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-10-08  8:53 [PATCH 00/28] Fair DRM scheduler Tvrtko Ursulin
2025-10-08  8:53 ` [PATCH 01/28] drm/sched: Reverse drm_sched_rq_init arguments Tvrtko Ursulin
2025-10-10  8:55   ` Philipp Stanner
2025-10-10  9:46     ` Tvrtko Ursulin
2025-10-10 10:36       ` Philipp Stanner
2025-10-11 13:21         ` Tvrtko Ursulin
2025-10-08  8:53 ` [PATCH 02/28] drm/sched: Add some scheduling quality unit tests Tvrtko Ursulin
2025-10-10  9:38   ` Philipp Stanner
2025-10-11 13:09     ` Tvrtko Ursulin
2025-10-08  8:53 ` [PATCH 03/28] drm/sched: Add some more " Tvrtko Ursulin
2025-10-10  9:48   ` Philipp Stanner
2025-10-11 13:21     ` Tvrtko Ursulin
2025-10-08  8:53 ` [PATCH 04/28] drm/sched: Implement RR via FIFO Tvrtko Ursulin
2025-10-10 10:18   ` Philipp Stanner
2025-10-11 13:30     ` Tvrtko Ursulin
2025-10-14  6:40       ` Philipp Stanner
2025-10-08  8:53 ` [PATCH 05/28] drm/sched: Consolidate entity run queue management Tvrtko Ursulin
2025-10-10 10:49   ` Philipp Stanner [this message]
2025-10-11 14:19     ` Tvrtko Ursulin
2025-10-14  6:53       ` Philipp Stanner
2025-10-14  7:26         ` Tvrtko Ursulin
2025-10-14  8:52           ` Philipp Stanner
2025-10-14 10:04             ` Tvrtko Ursulin
2025-10-14 11:23               ` Philipp Stanner
2025-10-08  8:53 ` [PATCH 06/28] drm/sched: Move run queue related code into a separate file Tvrtko Ursulin
2025-10-08 22:49   ` Matthew Brost
2025-10-08  8:53 ` [PATCH 07/28] drm/sched: Free all finished jobs at once Tvrtko Ursulin
2025-10-08 22:48   ` Matthew Brost
2025-10-08  8:53 ` [PATCH 08/28] drm/sched: Account entity GPU time Tvrtko Ursulin
2025-10-10 12:22   ` Philipp Stanner
2025-10-11 14:56     ` Tvrtko Ursulin
2025-10-08  8:53 ` [PATCH 09/28] drm/sched: Remove idle entity from tree Tvrtko Ursulin
2025-10-08  8:53 ` [PATCH 10/28] drm/sched: Add fair scheduling policy Tvrtko Ursulin
2025-10-14 10:27   ` Philipp Stanner
2025-10-14 12:56     ` Tvrtko Ursulin
2025-10-14 14:02       ` Philipp Stanner
2025-10-14 14:32         ` Simona Vetter
2025-10-14 14:58           ` Tvrtko Ursulin
2025-10-16  7:06             ` Philipp Stanner
2025-10-16  8:42               ` Tvrtko Ursulin
2025-10-16  9:50                 ` Danilo Krummrich
2025-10-16 10:54                   ` Tvrtko Ursulin
2025-10-16 11:14                     ` Danilo Krummrich
2025-10-08  8:53 ` [PATCH 11/28] drm/sched: Favour interactive clients slightly Tvrtko Ursulin
2025-10-14 10:53   ` Philipp Stanner
2025-10-14 12:20     ` Tvrtko Ursulin
2025-10-08  8:53 ` [PATCH 12/28] drm/sched: Switch default policy to fair Tvrtko Ursulin
2025-10-10 12:56   ` Philipp Stanner
2025-10-08  8:53 ` [PATCH 13/28] drm/sched: Remove FIFO and RR and simplify to a single run queue Tvrtko Ursulin
2025-10-14 11:16   ` Philipp Stanner
2025-10-14 13:16     ` Tvrtko Ursulin
2025-10-08  8:53 ` [PATCH 14/28] drm/sched: Embed run queue singleton into the scheduler Tvrtko Ursulin
2025-10-08  8:53 ` [PATCH 15/28] accel/amdxdna: Remove drm_sched_init_args->num_rqs usage Tvrtko Ursulin
2025-10-08  8:53 ` [PATCH 16/28] accel/rocket: " Tvrtko Ursulin
2025-10-08  8:53 ` [PATCH 17/28] drm/amdgpu: " Tvrtko Ursulin
2025-10-08  8:53 ` [PATCH 18/28] drm/etnaviv: " Tvrtko Ursulin
2025-10-08 10:31   ` Christian Gmeiner
2025-10-08  8:53 ` [PATCH 19/28] drm/imagination: " Tvrtko Ursulin
2025-10-10 14:29   ` Matt Coster
2025-10-08  8:53 ` [PATCH 20/28] drm/lima: " Tvrtko Ursulin
2025-10-08  8:53 ` [PATCH 21/28] drm/msm: " Tvrtko Ursulin
2025-10-08  8:53 ` [PATCH 22/28] drm/nouveau: " Tvrtko Ursulin
2025-10-08  8:53 ` [PATCH 23/28] drm/panfrost: " Tvrtko Ursulin
2025-10-08 14:55   ` Steven Price
2025-10-08  8:53 ` [PATCH 24/28] drm/panthor: " Tvrtko Ursulin
2025-10-08 14:55   ` Steven Price
2025-10-10 10:02   ` Liviu Dudau
2025-10-08  8:53 ` [PATCH 25/28] drm/sched: " Tvrtko Ursulin
2025-10-08 22:44   ` Matthew Brost
2025-10-08  8:53 ` [PATCH 26/28] drm/v3d: " Tvrtko Ursulin
2025-10-10 14:15   ` Melissa Wen
2025-10-08  8:53 ` [PATCH 27/28] drm/xe: " Tvrtko Ursulin
2025-10-08  8:53 ` [PATCH 28/28] drm/sched: Remove drm_sched_init_args->num_rqs Tvrtko Ursulin
2025-10-10 13:00   ` Philipp Stanner
2025-10-11 14:58     ` Tvrtko Ursulin
2025-10-10  8:59 ` [PATCH 00/28] Fair DRM scheduler Philipp Stanner

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=762e3469df06787205af88e068d72f60dfaebda4.camel@mailbox.org \
    --to=phasta@mailbox.org \
    --cc=amd-gfx@lists.freedesktop.org \
    --cc=christian.koenig@amd.com \
    --cc=dakr@kernel.org \
    --cc=dri-devel@lists.freedesktop.org \
    --cc=kernel-dev@igalia.com \
    --cc=matthew.brost@intel.com \
    --cc=phasta@kernel.org \
    --cc=tvrtko.ursulin@igalia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).