linux-rt-users.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Gregory Haskins <ghaskins@novell.com>
To: mingo@elte.hu
Cc: peterz@infradead.org, rostedt@goodmis.org,
	linux-kernel@vger.kernel.org, linux-rt-users@vger.kernel.org
Subject: Re: [PATCH v2 2/4] sched: track the next-highest priority on each runqueue
Date: Wed, 03 Dec 2008 23:22:23 -0500	[thread overview]
Message-ID: <49375AFF.2070307@novell.com> (raw)
In-Reply-To: <20081203220940.11729.49405.stgit@dev.haskins.net>

[-- Attachment #1: Type: text/plain, Size: 7657 bytes --]

Gregory Haskins wrote:
> We will use this later in the series to reduce the amount of rq-lock
> contention during a pull operation
>
> Signed-off-by: Gregory Haskins <ghaskins@novell.com>
> ---
>
>  kernel/sched.c    |    8 ++++-
>  kernel/sched_rt.c |   81 ++++++++++++++++++++++++++++++++++++++++-------------
>  2 files changed, 67 insertions(+), 22 deletions(-)
>
> diff --git a/kernel/sched.c b/kernel/sched.c
> index 6237b9b..24b11eb 100644
> --- a/kernel/sched.c
> +++ b/kernel/sched.c
> @@ -463,7 +463,10 @@ struct rt_rq {
>  	struct rt_prio_array active;
>  	unsigned long rt_nr_running;
>  #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
> -	int highest_prio; /* highest queued rt task prio */
> +	struct {
> +		int curr; /* highest queued rt task prio */
> +		int next; /* next highest */
> +	} highest_prio;
>  #endif
>  #ifdef CONFIG_SMP
>  	unsigned long rt_nr_migratory;
> @@ -8073,7 +8076,8 @@ static void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq)
>  	__set_bit(MAX_RT_PRIO, array->bitmap);
>  
>  #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
> -	rt_rq->highest_prio = MAX_RT_PRIO;
> +	rt_rq->highest_prio.curr = MAX_RT_PRIO;
> +	rt_rq->highest_prio.next = MAX_RT_PRIO;
>  #endif
>  #ifdef CONFIG_SMP
>  	rt_rq->rt_nr_migratory = 0;
> diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
> index fb1d4d7..a4022b6 100644
> --- a/kernel/sched_rt.c
> +++ b/kernel/sched_rt.c
> @@ -108,7 +108,7 @@ static void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
>  	if (rt_rq->rt_nr_running) {
>  		if (rt_se && !on_rt_rq(rt_se))
>  			enqueue_rt_entity(rt_se);
> -		if (rt_rq->highest_prio < curr->prio)
> +		if (rt_rq->highest_prio.curr < curr->prio)
>  			resched_task(curr);
>  	}
>  }
> @@ -473,7 +473,7 @@ static inline int rt_se_prio(struct sched_rt_entity *rt_se)
>  	struct rt_rq *rt_rq = group_rt_rq(rt_se);
>  
>  	if (rt_rq)
> -		return rt_rq->highest_prio;
> +		return rt_rq->highest_prio.curr;
>  #endif
>  
>  	return rt_task_of(rt_se)->prio;
> @@ -547,6 +547,21 @@ static void update_curr_rt(struct rq *rq)
>  	}
>  }
>  
> +#if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
> +
> +static struct task_struct *pick_next_highest_task_rt(struct rq *rq, int cpu);
> +
> +static inline int next_prio(struct rq *rq)
> +{
> +	struct task_struct *next = pick_next_highest_task_rt(rq, rq->cpu);
> +
> +	if (next && rt_prio(next->prio))
> +		return next->prio;
> +	else
> +		return MAX_RT_PRIO;
> +}
> +#endif
> +
>  static inline
>  void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
>  {
> @@ -560,14 +575,32 @@ void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
>  	WARN_ON(!rt_prio(prio));
>  	rt_rq->rt_nr_running++;
>  #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
> -	if (prio < rt_rq->highest_prio) {
> +	if (prio < rt_rq->highest_prio.curr) {
>  
> -		rt_rq->highest_prio = prio;
> +		/*
> +		 * If the new task is higher in priority than anything on the
> +		 * run-queue, we have a new high that must be published to
> +		 * the world.  We also know that the previous high becomes
> +		 * our next-highest.
> +		 */
> +		rt_rq->highest_prio.next = rt_rq->highest_prio.curr;
> +		rt_rq->highest_prio.curr = prio;
>  #ifdef CONFIG_SMP
>  		if (rq->online)
>  			cpupri_set(&rq->rd->cpupri, rq->cpu, prio);
>  #endif
> -	}
> +	} else if (prio == rt_rq->highest_prio.curr)
> +		/*
> +		 * If the next task is equal in priority to the highest on
> +		 * the run-queue, then we implicitly know that the next highest
> +		 * task cannot be any lower than current
> +		 */
> +		rt_rq->highest_prio.next = prio;
> +	else if (prio < rt_rq->highest_prio.next)
> +		/*
> +		 * Otherwise, we need to recompute next-highest
> +		 */
> +		rt_rq->highest_prio.next = next_prio(rq);
>  #endif
>  #ifdef CONFIG_SMP
>  	if (rt_se->nr_cpus_allowed > 1)
> @@ -591,7 +624,7 @@ void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
>  {
>  #ifdef CONFIG_SMP
>  	struct rq *rq = rq_of_rt_rq(rt_rq);
> -	int highest_prio = rt_rq->highest_prio;
> +	int highest_prio = rt_rq->highest_prio.curr;
>  #endif
>  
>  	WARN_ON(!rt_prio(rt_se_prio(rt_se)));
> @@ -599,24 +632,32 @@ void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
>  	rt_rq->rt_nr_running--;
>  #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
>  	if (rt_rq->rt_nr_running) {
> -		struct rt_prio_array *array;
> +		int prio = rt_se_prio(rt_se);
> +
> +		WARN_ON(prio < rt_rq->highest_prio.curr);
>  
> -		WARN_ON(rt_se_prio(rt_se) < rt_rq->highest_prio);
> -		if (rt_se_prio(rt_se) == rt_rq->highest_prio) {
> -			/* recalculate */
> -			array = &rt_rq->active;
> -			rt_rq->highest_prio =
> +		/*
> +		 * This may have been our highest or next-highest priority
> +		 * task and therefore we may have some recomputation to do
> +		 */
> +		if (prio == rt_rq->highest_prio.curr) {
> +			struct rt_prio_array *array = &rt_rq->active;
> +
> +			rt_rq->highest_prio.curr =
>  				sched_find_first_bit(array->bitmap);
> -		} /* otherwise leave rq->highest prio alone */
> +		}
> +
> +		if (prio == rt_rq->highest_prio.next)
>   

Crap.  Trying to fall asleep tonight, I realized this is a bug I think. 
Looks like I will need a v3

It should be "prio <= rt_rq->highest_prio.next" or we can miss updating
.next properly.

> +			rt_rq->highest_prio.next = next_prio(rq);
>  	} else
> -		rt_rq->highest_prio = MAX_RT_PRIO;
> +		rt_rq->highest_prio.curr = MAX_RT_PRIO;
>  #endif
>  #ifdef CONFIG_SMP
>  	if (rt_se->nr_cpus_allowed > 1)
>  		rq->rt.rt_nr_migratory--;
>  
> -	if (rq->online && rt_rq->highest_prio != highest_prio)
> -		cpupri_set(&rq->rd->cpupri, rq->cpu, rt_rq->highest_prio);
> +	if (rq->online && rt_rq->highest_prio.curr != highest_prio)
> +		cpupri_set(&rq->rd->cpupri, rq->cpu, rt_rq->highest_prio.curr);
>  
>  	update_rt_migration(rq);
>  #endif /* CONFIG_SMP */
> @@ -1066,7 +1107,7 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq)
>  		}
>  
>  		/* If this rq is still suitable use it. */
> -		if (lowest_rq->rt.highest_prio > task->prio)
> +		if (lowest_rq->rt.highest_prio.curr > task->prio)
>  			break;
>  
>  		/* try again */
> @@ -1254,7 +1295,7 @@ static int pull_rt_task(struct rq *this_rq)
>  static void pre_schedule_rt(struct rq *rq, struct task_struct *prev)
>  {
>  	/* Try to pull RT tasks here if we lower this rq's prio */
> -	if (unlikely(rt_task(prev)) && rq->rt.highest_prio > prev->prio)
> +	if (unlikely(rt_task(prev)) && rq->rt.highest_prio.curr > prev->prio)
>  		pull_rt_task(rq);
>  }
>  
> @@ -1340,7 +1381,7 @@ static void rq_online_rt(struct rq *rq)
>  
>  	__enable_runtime(rq);
>  
> -	cpupri_set(&rq->rd->cpupri, rq->cpu, rq->rt.highest_prio);
> +	cpupri_set(&rq->rd->cpupri, rq->cpu, rq->rt.highest_prio.curr);
>  }
>  
>  /* Assumes rq->lock is held */
> @@ -1431,7 +1472,7 @@ static void prio_changed_rt(struct rq *rq, struct task_struct *p,
>  		 * can release the rq lock and p could migrate.
>  		 * Only reschedule if p is still on the same runqueue.
>  		 */
> -		if (p->prio > rq->rt.highest_prio && rq->curr == p)
> +		if (p->prio > rq->rt.highest_prio.curr && rq->curr == p)
>  			resched_task(p);
>  #else
>  		/* For UP simply resched on drop of prio */
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-rt-users" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>   



[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 257 bytes --]

  reply	other threads:[~2008-12-04  4:18 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <20081203204025.GA31376@elte.hu>
2008-12-03 22:09 ` [PATCH v2 0/4] sched: track next-highest priority Gregory Haskins
2008-12-03 22:09   ` [PATCH v2 1/4] sched: cleanup inc/dec_rt_tasks Gregory Haskins
2008-12-03 22:09   ` [PATCH v2 2/4] sched: track the next-highest priority on each runqueue Gregory Haskins
2008-12-04  4:22     ` Gregory Haskins [this message]
2008-12-03 22:09   ` [PATCH v2 3/4] sched: use highest_prio.curr for pull threshold Gregory Haskins
2008-12-03 22:09   ` [PATCH v2 4/4] sched: use highest_prio.next to optimize pull operations Gregory Haskins
2008-12-04 15:43   ` [PATCH v3 0/4] sched: track next-highest priority Gregory Haskins
2008-12-04 15:43     ` [PATCH v3 1/4] sched: cleanup inc/dec_rt_tasks Gregory Haskins
2008-12-04 15:43     ` [PATCH v3 2/4] sched: track the next-highest priority on each runqueue Gregory Haskins
2008-12-04 15:43     ` [PATCH v3 3/4] sched: use highest_prio.curr for pull threshold Gregory Haskins
2008-12-04 15:43     ` [PATCH v3 4/4] sched: use highest_prio.next to optimize pull operations Gregory Haskins

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=49375AFF.2070307@novell.com \
    --to=ghaskins@novell.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-rt-users@vger.kernel.org \
    --cc=mingo@elte.hu \
    --cc=peterz@infradead.org \
    --cc=rostedt@goodmis.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).