All of lore.kernel.org
 help / color / mirror / Atom feed
From: Danilo Krummrich <dakr@redhat.com>
To: Tvrtko Ursulin <tursulin@igalia.com>
Cc: dri-devel@lists.freedesktop.org, kernel-dev@igalia.com,
	"Tvrtko Ursulin" <tvrtko.ursulin@igalia.com>,
	"Christian König" <christian.koenig@amd.com>,
	"Matthew Brost" <matthew.brost@intel.com>,
	"Philipp Stanner" <pstanner@redhat.com>
Subject: Re: [RFC 03/14] drm/sched: Implement RR via FIFO
Date: Wed, 8 Jan 2025 10:42:04 +0100	[thread overview]
Message-ID: <Z35IbF2QPZ3NAirM@pollux> (raw)
In-Reply-To: <20241230165259.95855-4-tursulin@igalia.com>

On Mon, Dec 30, 2024 at 04:52:48PM +0000, Tvrtko Ursulin wrote:
> From: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
> 
> Round-robin being the non-default policy and unclear how much it is used,

I don't know either, but would be nice if we could instead remove it.

> we can notice that it can be implemented using the FIFO data structures if
> we only invent a fake submit timestamp which is monotonically increasing
> inside drm_sched_rq instances.
> 
> So instead of remembering which was the last entity the scheduler worker
> picked, we can bump the picked one to the bottom of the tree, achieving
> the same round-robin behaviour.
> 
> Advantage is that we can consolidate to a single code path and remove a
> bunch of code. Downside is round-robin mode now needs to lock on the job
> pop path but that should not be visible.

I guess you did you test both scheduler strategies with this change?

> 
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
> Cc: Christian König <christian.koenig@amd.com>
> Cc: Danilo Krummrich <dakr@redhat.com>
> Cc: Matthew Brost <matthew.brost@intel.com>
> Cc: Philipp Stanner <pstanner@redhat.com>
> ---
>  drivers/gpu/drm/scheduler/sched_entity.c | 53 ++++++++-----
>  drivers/gpu/drm/scheduler/sched_main.c   | 99 +++---------------------
>  include/drm/gpu_scheduler.h              |  5 +-
>  3 files changed, 48 insertions(+), 109 deletions(-)
> 
> diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c
> index 8e910586979e..cb5f596b48b7 100644
> --- a/drivers/gpu/drm/scheduler/sched_entity.c
> +++ b/drivers/gpu/drm/scheduler/sched_entity.c
> @@ -473,9 +473,20 @@ drm_sched_job_dependency(struct drm_sched_job *job,
>  	return NULL;
>  }
>  
> +static ktime_t
> +drm_sched_rq_get_rr_deadline(struct drm_sched_rq *rq)
> +{
> +	lockdep_assert_held(&rq->lock);
> +
> +	rq->rr_deadline = ktime_add_ns(rq->rr_deadline, 1);
> +
> +	return rq->rr_deadline;
> +}
> +
>  struct drm_sched_job *drm_sched_entity_pop_job(struct drm_sched_entity *entity)
>  {
> -	struct drm_sched_job *sched_job;
> +	struct drm_sched_job *sched_job, *next_job;
> +	struct drm_sched_rq *rq;
>  
>  	sched_job = to_drm_sched_job(spsc_queue_peek(&entity->job_queue));
>  	if (!sched_job)
> @@ -510,24 +521,28 @@ struct drm_sched_job *drm_sched_entity_pop_job(struct drm_sched_entity *entity)
>  	 * Update the entity's location in the min heap according to
>  	 * the timestamp of the next job, if any.
>  	 */
> -	if (drm_sched_policy == DRM_SCHED_POLICY_FIFO) {
> -		struct drm_sched_job *next;
> -		struct drm_sched_rq *rq;
> +	next_job = to_drm_sched_job(spsc_queue_peek(&entity->job_queue));
>  
> -		spin_lock(&entity->lock);
> -		rq = entity->rq;
> -		spin_lock(&rq->lock);
> -		next = to_drm_sched_job(spsc_queue_peek(&entity->job_queue));
> -		if (next) {
> -			drm_sched_rq_update_fifo_locked(entity, rq,
> -							next->submit_ts);
> -		} else {
> -			drm_sched_rq_remove_fifo_locked(entity, rq);
> -		}
> -		spin_unlock(&rq->lock);
> -		spin_unlock(&entity->lock);
> +	spin_lock(&entity->lock);
> +	rq = entity->rq;
> +	spin_lock(&rq->lock);
> +
> +	if (next_job) {
> +		ktime_t ts;
> +
> +		if (drm_sched_policy == DRM_SCHED_POLICY_FIFO)
> +			ts = next_job->submit_ts;
> +		else
> +			ts = drm_sched_rq_get_rr_deadline(rq);
> +
> +		drm_sched_rq_update_fifo_locked(entity, rq, ts);
> +	} else {
> +		drm_sched_rq_remove_fifo_locked(entity, rq);
>  	}
>  
> +	spin_unlock(&rq->lock);
> +	spin_unlock(&entity->lock);
> +
>  	/* Jobs and entities might have different lifecycles. Since we're
>  	 * removing the job from the entities queue, set the jobs entity pointer
>  	 * to NULL to prevent any future access of the entity through this job.
> @@ -624,9 +639,9 @@ void drm_sched_entity_push_job(struct drm_sched_job *sched_job)
>  
>  		spin_lock(&rq->lock);
>  		drm_sched_rq_add_entity(rq, entity);
> -
> -		if (drm_sched_policy == DRM_SCHED_POLICY_FIFO)
> -			drm_sched_rq_update_fifo_locked(entity, rq, submit_ts);
> +		if (drm_sched_policy == DRM_SCHED_POLICY_RR)
> +			submit_ts = drm_sched_rq_get_rr_deadline(rq);
> +		drm_sched_rq_update_fifo_locked(entity, rq, submit_ts);
>  
>  		spin_unlock(&rq->lock);
>  		spin_unlock(&entity->lock);
> diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
> index 9beb4c611988..eb22b1b7de36 100644
> --- a/drivers/gpu/drm/scheduler/sched_main.c
> +++ b/drivers/gpu/drm/scheduler/sched_main.c
> @@ -189,7 +189,6 @@ static void drm_sched_rq_init(struct drm_gpu_scheduler *sched,
>  	spin_lock_init(&rq->lock);
>  	INIT_LIST_HEAD(&rq->entities);
>  	rq->rb_tree_root = RB_ROOT_CACHED;
> -	rq->current_entity = NULL;
>  	rq->sched = sched;
>  }
>  
> @@ -235,82 +234,13 @@ void drm_sched_rq_remove_entity(struct drm_sched_rq *rq,
>  	atomic_dec(rq->sched->score);
>  	list_del_init(&entity->list);
>  
> -	if (rq->current_entity == entity)
> -		rq->current_entity = NULL;
> -
> -	if (drm_sched_policy == DRM_SCHED_POLICY_FIFO)
> -		drm_sched_rq_remove_fifo_locked(entity, rq);
> +	drm_sched_rq_remove_fifo_locked(entity, rq);
>  
>  	spin_unlock(&rq->lock);
>  }
>  
>  /**
> - * drm_sched_rq_select_entity_rr - Select an entity which could provide a job to run
> - *
> - * @sched: the gpu scheduler
> - * @rq: scheduler run queue to check.
> - *
> - * Try to find the next ready entity.
> - *
> - * Return an entity if one is found; return an error-pointer (!NULL) if an
> - * entity was ready, but the scheduler had insufficient credits to accommodate
> - * its job; return NULL, if no ready entity was found.
> - */
> -static struct drm_sched_entity *
> -drm_sched_rq_select_entity_rr(struct drm_gpu_scheduler *sched,
> -			      struct drm_sched_rq *rq)
> -{
> -	struct drm_sched_entity *entity;
> -
> -	spin_lock(&rq->lock);
> -
> -	entity = rq->current_entity;
> -	if (entity) {
> -		list_for_each_entry_continue(entity, &rq->entities, list) {
> -			if (drm_sched_entity_is_ready(entity)) {
> -				/* If we can't queue yet, preserve the current
> -				 * entity in terms of fairness.
> -				 */
> -				if (!drm_sched_can_queue(sched, entity)) {
> -					spin_unlock(&rq->lock);
> -					return ERR_PTR(-ENOSPC);
> -				}
> -
> -				rq->current_entity = entity;
> -				reinit_completion(&entity->entity_idle);
> -				spin_unlock(&rq->lock);
> -				return entity;
> -			}
> -		}
> -	}
> -
> -	list_for_each_entry(entity, &rq->entities, list) {
> -		if (drm_sched_entity_is_ready(entity)) {
> -			/* If we can't queue yet, preserve the current entity in
> -			 * terms of fairness.
> -			 */
> -			if (!drm_sched_can_queue(sched, entity)) {
> -				spin_unlock(&rq->lock);
> -				return ERR_PTR(-ENOSPC);
> -			}
> -
> -			rq->current_entity = entity;
> -			reinit_completion(&entity->entity_idle);
> -			spin_unlock(&rq->lock);
> -			return entity;
> -		}
> -
> -		if (entity == rq->current_entity)
> -			break;
> -	}
> -
> -	spin_unlock(&rq->lock);
> -
> -	return NULL;
> -}
> -
> -/**
> - * drm_sched_rq_select_entity_fifo - Select an entity which provides a job to run
> + * drm_sched_rq_select_entity - Select an entity which provides a job to run
>   *
>   * @sched: the gpu scheduler
>   * @rq: scheduler run queue to check.
> @@ -322,32 +252,29 @@ drm_sched_rq_select_entity_rr(struct drm_gpu_scheduler *sched,
>   * its job; return NULL, if no ready entity was found.
>   */
>  static struct drm_sched_entity *
> -drm_sched_rq_select_entity_fifo(struct drm_gpu_scheduler *sched,
> -				struct drm_sched_rq *rq)
> +drm_sched_rq_select_entity(struct drm_gpu_scheduler *sched,
> +			   struct drm_sched_rq *rq)
>  {
> +	struct drm_sched_entity *entity = NULL;
>  	struct rb_node *rb;
>  
>  	spin_lock(&rq->lock);
>  	for (rb = rb_first_cached(&rq->rb_tree_root); rb; rb = rb_next(rb)) {
> -		struct drm_sched_entity *entity;
> -
>  		entity = rb_entry(rb, struct drm_sched_entity, rb_tree_node);
>  		if (drm_sched_entity_is_ready(entity)) {
> -			/* If we can't queue yet, preserve the current entity in
> -			 * terms of fairness.
> -			 */
>  			if (!drm_sched_can_queue(sched, entity)) {
> -				spin_unlock(&rq->lock);
> -				return ERR_PTR(-ENOSPC);
> +				entity = ERR_PTR(-ENOSPC);
> +				break;
>  			}
>  
>  			reinit_completion(&entity->entity_idle);
>  			break;
>  		}
> +		entity = NULL;
>  	}
>  	spin_unlock(&rq->lock);
>  
> -	return rb ? rb_entry(rb, struct drm_sched_entity, rb_tree_node) : NULL;
> +	return entity;
>  }

The whole diff of drm_sched_rq_select_entity_fifo() (except for the name) has
nothing to do with the patch, does it?

If you want refactor things, please do it in a separate patch.

>  
>  /**
> @@ -1045,20 +972,18 @@ void drm_sched_wakeup(struct drm_gpu_scheduler *sched)
>  static struct drm_sched_entity *
>  drm_sched_select_entity(struct drm_gpu_scheduler *sched)
>  {
> -	struct drm_sched_entity *entity;
> +	struct drm_sched_entity *entity = NULL;
>  	int i;
>  
>  	/* Start with the highest priority.
>  	 */
>  	for (i = DRM_SCHED_PRIORITY_KERNEL; i < sched->num_rqs; i++) {
> -		entity = drm_sched_policy == DRM_SCHED_POLICY_FIFO ?
> -			drm_sched_rq_select_entity_fifo(sched, sched->sched_rq[i]) :
> -			drm_sched_rq_select_entity_rr(sched, sched->sched_rq[i]);
> +		entity = drm_sched_rq_select_entity(sched, sched->sched_rq[i]);
>  		if (entity)
>  			break;
>  	}
>  
> -	return IS_ERR(entity) ? NULL : entity;
> +	return IS_ERR(entity) ? NULL : entity;;
>  }
>  
>  /**
> diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
> index 978ca621cc13..db65600732b9 100644
> --- a/include/drm/gpu_scheduler.h
> +++ b/include/drm/gpu_scheduler.h
> @@ -245,8 +245,7 @@ struct drm_sched_entity {
>   * struct drm_sched_rq - queue of entities to be scheduled.
>   *
>   * @sched: the scheduler to which this rq belongs to.
> - * @lock: protects @entities, @rb_tree_root and @current_entity.
> - * @current_entity: the entity which is to be scheduled.
> + * @lock: protects @entities, @rb_tree_root and @rr_deadline.
>   * @entities: list of the entities to be scheduled.
>   * @rb_tree_root: root of time based priority queue of entities for FIFO scheduling
>   *
> @@ -259,7 +258,7 @@ struct drm_sched_rq {
>  
>  	spinlock_t			lock;
>  	/* Following members are protected by the @lock: */
> -	struct drm_sched_entity		*current_entity;
> +	ktime_t				rr_deadline;
>  	struct list_head		entities;
>  	struct rb_root_cached		rb_tree_root;
>  };
> -- 
> 2.47.1
> 


  reply	other threads:[~2025-01-08  9:42 UTC|newest]

Thread overview: 61+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-12-30 16:52 [RFC 00/14] Deadline scheduler and other ideas Tvrtko Ursulin
2024-12-30 16:52 ` [RFC 01/14] drm/sched: Delete unused update_job_credits Tvrtko Ursulin
2025-01-08  8:34   ` Danilo Krummrich
2025-01-08 12:27     ` Boris Brezillon
2025-01-08 14:08       ` Matt Coster
2024-12-30 16:52 ` [RFC 02/14] drm/sched: Remove idle entity from tree Tvrtko Ursulin
2024-12-30 16:52 ` [RFC 03/14] drm/sched: Implement RR via FIFO Tvrtko Ursulin
2025-01-08  9:42   ` Danilo Krummrich [this message]
2025-01-08 15:04     ` Tvrtko Ursulin
2024-12-30 16:52 ` [RFC 04/14] drm/sched: Consolidate entity run queue management Tvrtko Ursulin
2024-12-30 16:52 ` [RFC 05/14] drm/sched: Move run queue related code into a separate file Tvrtko Ursulin
2024-12-31  0:35   ` kernel test robot
2024-12-30 16:52 ` [RFC 06/14] drm/sched: Ignore own fence earlier Tvrtko Ursulin
2024-12-30 16:52 ` [RFC 07/14] drm/sched: Resolve same scheduler dependencies earlier Tvrtko Ursulin
2024-12-30 16:52 ` [RFC 08/14] drm/sched: Add deadline policy Tvrtko Ursulin
2025-01-02 13:11   ` Philipp Stanner
2025-01-03 12:40     ` Tvrtko Ursulin
2025-01-03 12:59       ` Philipp Stanner
2025-01-03 15:11         ` Tvrtko Ursulin
2024-12-30 16:52 ` [RFC 09/14] drm/sched: Remove FIFO and RR and simplify to a single run queue Tvrtko Ursulin
2024-12-30 16:52 ` [RFC 10/14] drm/sched: Queue all free credits in one worker invocation Tvrtko Ursulin
2025-01-07 11:08   ` Tvrtko Ursulin
2024-12-30 16:52 ` [RFC 11/14] drm/sched: Connect with dma-fence deadlines Tvrtko Ursulin
2025-01-07 11:10   ` Tvrtko Ursulin
2025-01-09 11:38   ` Michel Dänzer
2025-01-09 13:31     ` Tvrtko Ursulin
2025-01-09 14:44       ` Michel Dänzer
2025-01-09 16:44         ` Tvrtko Ursulin
2024-12-30 16:52 ` [RFC 12/14] drm/sched: Embed run queue singleton into the scheduler Tvrtko Ursulin
2024-12-31  2:39   ` kernel test robot
2024-12-30 16:52 ` [RFC 13/14] dma-fence: Add helper for custom fence context when merging fences Tvrtko Ursulin
2024-12-30 16:52 ` [RFC 14/14] drm/sched: Resolve all job dependencies in one go Tvrtko Ursulin
2024-12-31  6:01   ` kernel test robot
2025-01-02 13:09 ` [RFC 00/14] Deadline scheduler and other ideas Philipp Stanner
2025-01-03 12:02   ` Tvrtko Ursulin
2025-01-03 12:31     ` Christian König
2025-01-03 13:45       ` Philipp Stanner
2025-01-03 15:17       ` Tvrtko Ursulin
2025-01-09 15:08       ` Michel Dänzer
2025-01-09 16:55         ` Tvrtko Ursulin
2025-01-10  9:14           ` Michel Dänzer
2025-01-13 11:40             ` Tvrtko Ursulin
2025-01-13 15:29               ` Michel Dänzer
2025-01-15 13:38                 ` Tvrtko Ursulin
2025-01-15 14:46                   ` Michel Dänzer
2025-01-03 15:16 ` AW: " Koenig, Christian
2025-01-03 15:32   ` Tvrtko Ursulin
2025-01-06 13:47   ` Simona Vetter
2025-01-08  8:07     ` Philipp Stanner
2025-01-08 17:59       ` Simona Vetter
2025-01-08 18:06         ` Tvrtko Ursulin
2025-01-08  8:31 ` Danilo Krummrich
2025-01-08 15:13   ` Tvrtko Ursulin
2025-01-08 16:57     ` Danilo Krummrich
2025-01-08 18:55       ` Tvrtko Ursulin
2025-01-09 19:59         ` Matthew Brost
2025-01-10  9:16           ` Tvrtko Ursulin
2025-01-10 17:28             ` Matthew Brost
2025-01-13 12:59               ` Tvrtko Ursulin
2025-01-09 19:50       ` Matthew Brost
2025-01-17 12:12 ` Philipp Stanner

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=Z35IbF2QPZ3NAirM@pollux \
    --to=dakr@redhat.com \
    --cc=christian.koenig@amd.com \
    --cc=dri-devel@lists.freedesktop.org \
    --cc=kernel-dev@igalia.com \
    --cc=matthew.brost@intel.com \
    --cc=pstanner@redhat.com \
    --cc=tursulin@igalia.com \
    --cc=tvrtko.ursulin@igalia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.