Re: [PATCH v2 2/4] sched: track the next-highest priority on each runqueue

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Gregory Haskins <ghaskins@novell.com>
To: mingo@elte.hu
Cc: peterz@infradead.org, rostedt@goodmis.org,
	linux-kernel@vger.kernel.org, linux-rt-users@vger.kernel.org
Subject: Re: [PATCH v2 2/4] sched: track the next-highest priority on each runqueue
Date: Wed, 03 Dec 2008 23:22:23 -0500	[thread overview]
Message-ID: <49375AFF.2070307@novell.com> (raw)
In-Reply-To: <20081203220940.11729.49405.stgit@dev.haskins.net>

[-- Attachment #1: Type: text/plain, Size: 7657 bytes --]

Gregory Haskins wrote:
> We will use this later in the series to reduce the amount of rq-lock
> contention during a pull operation
>
> Signed-off-by: Gregory Haskins <ghaskins@novell.com>
> ---
>
>  kernel/sched.c    |    8 ++++-
>  kernel/sched_rt.c |   81 ++++++++++++++++++++++++++++++++++++++++-------------
>  2 files changed, 67 insertions(+), 22 deletions(-)
>
> diff --git a/kernel/sched.c b/kernel/sched.c
> index 6237b9b..24b11eb 100644
> --- a/kernel/sched.c
> +++ b/kernel/sched.c
> @@ -463,7 +463,10 @@ struct rt_rq {
>  	struct rt_prio_array active;
>  	unsigned long rt_nr_running;
>  #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
> -	int highest_prio; /* highest queued rt task prio */
> +	struct {
> +		int curr; /* highest queued rt task prio */
> +		int next; /* next highest */
> +	} highest_prio;
>  #endif
>  #ifdef CONFIG_SMP
>  	unsigned long rt_nr_migratory;
> @@ -8073,7 +8076,8 @@ static void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq)
>  	__set_bit(MAX_RT_PRIO, array->bitmap);
>  
>  #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
> -	rt_rq->highest_prio = MAX_RT_PRIO;
> +	rt_rq->highest_prio.curr = MAX_RT_PRIO;
> +	rt_rq->highest_prio.next = MAX_RT_PRIO;
>  #endif
>  #ifdef CONFIG_SMP
>  	rt_rq->rt_nr_migratory = 0;
> diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
> index fb1d4d7..a4022b6 100644
> --- a/kernel/sched_rt.c
> +++ b/kernel/sched_rt.c
> @@ -108,7 +108,7 @@ static void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
>  	if (rt_rq->rt_nr_running) {
>  		if (rt_se && !on_rt_rq(rt_se))
>  			enqueue_rt_entity(rt_se);
> -		if (rt_rq->highest_prio < curr->prio)
> +		if (rt_rq->highest_prio.curr < curr->prio)
>  			resched_task(curr);
>  	}
>  }
> @@ -473,7 +473,7 @@ static inline int rt_se_prio(struct sched_rt_entity *rt_se)
>  	struct rt_rq *rt_rq = group_rt_rq(rt_se);
>  
>  	if (rt_rq)
> -		return rt_rq->highest_prio;
> +		return rt_rq->highest_prio.curr;
>  #endif
>  
>  	return rt_task_of(rt_se)->prio;
> @@ -547,6 +547,21 @@ static void update_curr_rt(struct rq *rq)
>  	}
>  }
>  
> +#if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
> +
> +static struct task_struct *pick_next_highest_task_rt(struct rq *rq, int cpu);
> +
> +static inline int next_prio(struct rq *rq)
> +{
> +	struct task_struct *next = pick_next_highest_task_rt(rq, rq->cpu);
> +
> +	if (next && rt_prio(next->prio))
> +		return next->prio;
> +	else
> +		return MAX_RT_PRIO;
> +}
> +#endif
> +
>  static inline
>  void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
>  {
> @@ -560,14 +575,32 @@ void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
>  	WARN_ON(!rt_prio(prio));
>  	rt_rq->rt_nr_running++;
>  #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
> -	if (prio < rt_rq->highest_prio) {
> +	if (prio < rt_rq->highest_prio.curr) {
>  
> -		rt_rq->highest_prio = prio;
> +		/*
> +		 * If the new task is higher in priority than anything on the
> +		 * run-queue, we have a new high that must be published to
> +		 * the world.  We also know that the previous high becomes
> +		 * our next-highest.
> +		 */
> +		rt_rq->highest_prio.next = rt_rq->highest_prio.curr;
> +		rt_rq->highest_prio.curr = prio;
>  #ifdef CONFIG_SMP
>  		if (rq->online)
>  			cpupri_set(&rq->rd->cpupri, rq->cpu, prio);
>  #endif
> -	}
> +	} else if (prio == rt_rq->highest_prio.curr)
> +		/*
> +		 * If the next task is equal in priority to the highest on
> +		 * the run-queue, then we implicitly know that the next highest
> +		 * task cannot be any lower than current
> +		 */
> +		rt_rq->highest_prio.next = prio;
> +	else if (prio < rt_rq->highest_prio.next)
> +		/*
> +		 * Otherwise, we need to recompute next-highest
> +		 */
> +		rt_rq->highest_prio.next = next_prio(rq);
>  #endif
>  #ifdef CONFIG_SMP
>  	if (rt_se->nr_cpus_allowed > 1)
> @@ -591,7 +624,7 @@ void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
>  {
>  #ifdef CONFIG_SMP
>  	struct rq *rq = rq_of_rt_rq(rt_rq);
> -	int highest_prio = rt_rq->highest_prio;
> +	int highest_prio = rt_rq->highest_prio.curr;
>  #endif
>  
>  	WARN_ON(!rt_prio(rt_se_prio(rt_se)));
> @@ -599,24 +632,32 @@ void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
>  	rt_rq->rt_nr_running--;
>  #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
>  	if (rt_rq->rt_nr_running) {
> -		struct rt_prio_array *array;
> +		int prio = rt_se_prio(rt_se);
> +
> +		WARN_ON(prio < rt_rq->highest_prio.curr);
>  
> -		WARN_ON(rt_se_prio(rt_se) < rt_rq->highest_prio);
> -		if (rt_se_prio(rt_se) == rt_rq->highest_prio) {
> -			/* recalculate */
> -			array = &rt_rq->active;
> -			rt_rq->highest_prio =
> +		/*
> +		 * This may have been our highest or next-highest priority
> +		 * task and therefore we may have some recomputation to do
> +		 */
> +		if (prio == rt_rq->highest_prio.curr) {
> +			struct rt_prio_array *array = &rt_rq->active;
> +
> +			rt_rq->highest_prio.curr =
>  				sched_find_first_bit(array->bitmap);
> -		} /* otherwise leave rq->highest prio alone */
> +		}
> +
> +		if (prio == rt_rq->highest_prio.next)
>   

Crap.  Trying to fall asleep tonight, I realized this is a bug I think. 
Looks like I will need a v3

It should be "prio <= rt_rq->highest_prio.next" or we can miss updating
.next properly.

> +			rt_rq->highest_prio.next = next_prio(rq);
>  	} else
> -		rt_rq->highest_prio = MAX_RT_PRIO;
> +		rt_rq->highest_prio.curr = MAX_RT_PRIO;
>  #endif
>  #ifdef CONFIG_SMP
>  	if (rt_se->nr_cpus_allowed > 1)
>  		rq->rt.rt_nr_migratory--;
>  
> -	if (rq->online && rt_rq->highest_prio != highest_prio)
> -		cpupri_set(&rq->rd->cpupri, rq->cpu, rt_rq->highest_prio);
> +	if (rq->online && rt_rq->highest_prio.curr != highest_prio)
> +		cpupri_set(&rq->rd->cpupri, rq->cpu, rt_rq->highest_prio.curr);
>  
>  	update_rt_migration(rq);
>  #endif /* CONFIG_SMP */
> @@ -1066,7 +1107,7 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq)
>  		}
>  
>  		/* If this rq is still suitable use it. */
> -		if (lowest_rq->rt.highest_prio > task->prio)
> +		if (lowest_rq->rt.highest_prio.curr > task->prio)
>  			break;
>  
>  		/* try again */
> @@ -1254,7 +1295,7 @@ static int pull_rt_task(struct rq *this_rq)
>  static void pre_schedule_rt(struct rq *rq, struct task_struct *prev)
>  {
>  	/* Try to pull RT tasks here if we lower this rq's prio */
> -	if (unlikely(rt_task(prev)) && rq->rt.highest_prio > prev->prio)
> +	if (unlikely(rt_task(prev)) && rq->rt.highest_prio.curr > prev->prio)
>  		pull_rt_task(rq);
>  }
>  
> @@ -1340,7 +1381,7 @@ static void rq_online_rt(struct rq *rq)
>  
>  	__enable_runtime(rq);
>  
> -	cpupri_set(&rq->rd->cpupri, rq->cpu, rq->rt.highest_prio);
> +	cpupri_set(&rq->rd->cpupri, rq->cpu, rq->rt.highest_prio.curr);
>  }
>  
>  /* Assumes rq->lock is held */
> @@ -1431,7 +1472,7 @@ static void prio_changed_rt(struct rq *rq, struct task_struct *p,
>  		 * can release the rq lock and p could migrate.
>  		 * Only reschedule if p is still on the same runqueue.
>  		 */
> -		if (p->prio > rq->rt.highest_prio && rq->curr == p)
> +		if (p->prio > rq->rt.highest_prio.curr && rq->curr == p)
>  			resched_task(p);
>  #else
>  		/* For UP simply resched on drop of prio */
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-rt-users" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>   



[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 257 bytes --]

next prev parent reply	other threads:[~2008-12-04  4:18 UTC|newest]

Thread overview: 25+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-11-11 14:26 [RFC PATCH 0/3] Series short description Gregory Haskins
2008-11-11 14:26 ` [RFC PATCH 1/3] sched: track the next-highest priority on each runqueue Gregory Haskins
2008-11-11 14:26 ` [RFC PATCH 2/3] sched: use highest_prio.curr for pull threshold Gregory Haskins
2008-11-11 14:26 ` [RFC PATCH 3/3] sched: use highest_prio.next to optimize pull operations Gregory Haskins
2008-11-11 17:56 ` [RFC PATCH 0/3] Series short description Ingo Molnar
2008-11-11 18:51   ` Gregory Haskins
2008-11-11 19:31 ` Chris Friesen
2008-11-11 19:50   ` Gregory Haskins
2008-12-03 20:01 ` Gregory Haskins
2008-12-03 20:30   ` Randy Dunlap
2008-12-03 20:39     ` [RFC PATCH 0/3] sched: track next-highest priority (was "Series short discription") Gregory Haskins
2008-12-03 20:40   ` [RFC PATCH 0/3] Series short description Ingo Molnar
2008-12-03 22:09     ` [PATCH v2 0/4] sched: track next-highest priority Gregory Haskins
2008-12-03 22:09       ` [PATCH v2 1/4] sched: cleanup inc/dec_rt_tasks Gregory Haskins
2008-12-03 22:09       ` [PATCH v2 2/4] sched: track the next-highest priority on each runqueue Gregory Haskins
2008-12-04  4:22         ` Gregory Haskins [this message]
2008-12-03 22:09       ` [PATCH v2 3/4] sched: use highest_prio.curr for pull threshold Gregory Haskins
2008-12-03 22:09       ` [PATCH v2 4/4] sched: use highest_prio.next to optimize pull operations Gregory Haskins
2008-12-04 15:43       ` [PATCH v3 0/4] sched: track next-highest priority Gregory Haskins
2008-12-04 15:43         ` [PATCH v3 1/4] sched: cleanup inc/dec_rt_tasks Gregory Haskins
2008-12-04 15:43           ` Gregory Haskins
2008-12-04 15:43         ` [PATCH v3 2/4] sched: track the next-highest priority on each runqueue Gregory Haskins
2008-12-04 15:43         ` [PATCH v3 3/4] sched: use highest_prio.curr for pull threshold Gregory Haskins
2008-12-04 15:43           ` Gregory Haskins
2008-12-04 15:43         ` [PATCH v3 4/4] sched: use highest_prio.next to optimize pull operations Gregory Haskins

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=49375AFF.2070307@novell.com \
    --to=ghaskins@novell.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-rt-users@vger.kernel.org \
    --cc=mingo@elte.hu \
    --cc=peterz@infradead.org \
    --cc=rostedt@goodmis.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.