From mboxrd@z Thu Jan 1 00:00:00 1970 From: Xiaotian Feng Subject: Re: [RFC] sched: implement the exclusive wait queue as a LIFO queue Date: Wed, 28 Apr 2010 15:47:54 +0800 Message-ID: References: <1272430986-20436-1-git-send-email-xiaosuo@gmail.com> Mime-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: QUOTED-PRINTABLE Cc: Ingo Molnar , Alexander Viro , Andrew Morton , "Eric W. Biederman" , Davide Libenzi , Roland Dreier , Stefan Richter , Peter Zijlstra , "David S. Miller" , Eric Dumazet , Christoph Lameter , Andreas Herrmann , Thomas Gleixner , David Howells , Takashi Iwai , linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org To: Changli Gao Return-path: Received: from mail-qy0-f179.google.com ([209.85.221.179]:37029 "EHLO mail-qy0-f179.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751962Ab0D1Hr5 convert rfc822-to-8bit (ORCPT ); Wed, 28 Apr 2010 03:47:57 -0400 In-Reply-To: <1272430986-20436-1-git-send-email-xiaosuo@gmail.com> Sender: linux-fsdevel-owner@vger.kernel.org List-ID: On Wed, Apr 28, 2010 at 1:03 PM, Changli Gao wrote: > implement the exclusive wait queue as a LIFO queue > > If the exclusive wait queue is also a LIFO queue as the normal wait q= ueue, the > process who goes to sleep recently, will be woke up first. As its mem= ory is > more likely in cache, we will get better performance. And when there = are many > processes waiting on a exclusive wait queue, some of them may not be = woke up, > if the others can handle the workload, and it will reduce the load of > the scheduler. > Starve some processes for performance? > Note: before applying this patch, you need my previous patch patched = first. > https://patchwork.kernel.org/patch/95600/ > > Signed-off-by: Changli Gao > ---- > =C2=A0fs/eventpoll.c =C2=A0 =C2=A0 =C2=A0 | =C2=A0 =C2=A03 +-- > =C2=A0include/linux/wait.h | =C2=A0 17 +++++++---------- > =C2=A0kernel/sched.c =C2=A0 =C2=A0 =C2=A0 | =C2=A0 =C2=A08 ++++---- > =C2=A0kernel/wait.c =C2=A0 =C2=A0 =C2=A0 =C2=A0| =C2=A0 =C2=A09 +++--= ---- > =C2=A04 files changed, 15 insertions(+), 22 deletions(-) > diff --git a/fs/eventpoll.c b/fs/eventpoll.c > index bd056a5..e9b3ebe 100644 > --- a/fs/eventpoll.c > +++ b/fs/eventpoll.c > @@ -1140,8 +1140,7 @@ retry: > =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 * ep_poll_cal= lback() when events will become available. > =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 */ > =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0init_waitqueue= _entry(&wait, current); > - =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 wait.flags |=3D WQ= _FLAG_EXCLUSIVE; > - =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 __add_wait_queue(&= ep->wq, &wait); > + =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 __add_wait_queue_e= x(&ep->wq, &wait); > > =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0for (;;) { > =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0= =C2=A0 =C2=A0/* > diff --git a/include/linux/wait.h b/include/linux/wait.h > index a48e16b..95c127d 100644 > --- a/include/linux/wait.h > +++ b/include/linux/wait.h > @@ -30,8 +30,6 @@ typedef int (*wait_queue_func_t)(wait_queue_t *wait= , unsigned mode, int flags, v > =C2=A0int default_wake_function(wait_queue_t *wait, unsigned mode, in= t flags, void *key); > > =C2=A0struct __wait_queue { > - =C2=A0 =C2=A0 =C2=A0 unsigned int flags; > -#define WQ_FLAG_EXCLUSIVE =C2=A0 =C2=A0 =C2=A00x01 > =C2=A0 =C2=A0 =C2=A0 =C2=A0void *private; > =C2=A0 =C2=A0 =C2=A0 =C2=A0wait_queue_func_t func; > =C2=A0 =C2=A0 =C2=A0 =C2=A0struct list_head task_list; > @@ -50,6 +48,7 @@ struct wait_bit_queue { > =C2=A0struct __wait_queue_head { > =C2=A0 =C2=A0 =C2=A0 =C2=A0spinlock_t lock; > =C2=A0 =C2=A0 =C2=A0 =C2=A0struct list_head task_list; > + =C2=A0 =C2=A0 =C2=A0 struct list_head task_list_ex; > =C2=A0}; > =C2=A0typedef struct __wait_queue_head wait_queue_head_t; > > @@ -69,7 +68,8 @@ struct task_struct; > > =C2=A0#define __WAIT_QUEUE_HEAD_INITIALIZER(name) { =C2=A0 =C2=A0 =C2= =A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0= \ > =C2=A0 =C2=A0 =C2=A0 =C2=A0.lock =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =3D= __SPIN_LOCK_UNLOCKED(name.lock), =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2= =A0 =C2=A0\ > - =C2=A0 =C2=A0 =C2=A0 .task_list =C2=A0 =C2=A0 =C2=A0=3D { &(name).t= ask_list, &(name).task_list } } > + =C2=A0 =C2=A0 =C2=A0 .task_list =C2=A0 =C2=A0 =C2=A0=3D { &(name).t= ask_list, &(name).task_list }, =C2=A0 =C2=A0 \ > + =C2=A0 =C2=A0 =C2=A0 .task_list_ex =C2=A0 =3D { &(name).task_list_e= x, &(name).task_list_ex } } > > =C2=A0#define DECLARE_WAIT_QUEUE_HEAD(name) \ > =C2=A0 =C2=A0 =C2=A0 =C2=A0wait_queue_head_t name =3D __WAIT_QUEUE_HE= AD_INITIALIZER(name) > @@ -97,7 +97,6 @@ extern void __init_waitqueue_head(wait_queue_head_t= *q, struct lock_class_key *) > > =C2=A0static inline void init_waitqueue_entry(wait_queue_t *q, struct= task_struct *p) > =C2=A0{ > - =C2=A0 =C2=A0 =C2=A0 q->flags =3D 0; > =C2=A0 =C2=A0 =C2=A0 =C2=A0q->private =3D p; > =C2=A0 =C2=A0 =C2=A0 =C2=A0q->func =3D default_wake_function; > =C2=A0} > @@ -105,14 +104,13 @@ static inline void init_waitqueue_entry(wait_qu= eue_t *q, struct task_struct *p) > =C2=A0static inline void init_waitqueue_func_entry(wait_queue_t *q, > =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0= =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0w= ait_queue_func_t func) > =C2=A0{ > - =C2=A0 =C2=A0 =C2=A0 q->flags =3D 0; > =C2=A0 =C2=A0 =C2=A0 =C2=A0q->private =3D NULL; > =C2=A0 =C2=A0 =C2=A0 =C2=A0q->func =3D func; > =C2=A0} > > =C2=A0static inline int waitqueue_active(wait_queue_head_t *q) > =C2=A0{ > - =C2=A0 =C2=A0 =C2=A0 return !list_empty(&q->task_list); > + =C2=A0 =C2=A0 =C2=A0 return !list_empty(&q->task_list) || !list_emp= ty(&q->task_list); > =C2=A0} > > =C2=A0extern void add_wait_queue(wait_queue_head_t *q, wait_queue_t *= wait); > @@ -127,10 +125,10 @@ static inline void __add_wait_queue(wait_queue_= head_t *head, wait_queue_t *new) > =C2=A0/* > =C2=A0* Used for wake-one threads: > =C2=A0*/ > -static inline void __add_wait_queue_tail(wait_queue_head_t *head, > +static inline void __add_wait_queue_ex(wait_queue_head_t *head, > =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0= =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 = =C2=A0 =C2=A0 =C2=A0 =C2=A0wait_queue_t *new) > =C2=A0{ > - =C2=A0 =C2=A0 =C2=A0 list_add_tail(&new->task_list, &head->task_lis= t); > + =C2=A0 =C2=A0 =C2=A0 list_add(&new->task_list, &head->task_list_ex)= ; > =C2=A0} > > =C2=A0static inline void __remove_wait_queue(wait_queue_head_t *head, > @@ -409,8 +407,7 @@ do { =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2= =A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0= =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 = =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0\ > =C2=A0static inline void add_wait_queue_exclusive_locked(wait_queue_h= ead_t *q, > =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0= =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 = =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 wait_queue_t * wait) > =C2=A0{ > - =C2=A0 =C2=A0 =C2=A0 wait->flags |=3D WQ_FLAG_EXCLUSIVE; > - =C2=A0 =C2=A0 =C2=A0 __add_wait_queue_tail(q, =C2=A0wait); > + =C2=A0 =C2=A0 =C2=A0 __add_wait_queue_ex(q, =C2=A0wait); > =C2=A0} > > =C2=A0/* > diff --git a/kernel/sched.c b/kernel/sched.c > index be5ab70..59b1534 100644 > --- a/kernel/sched.c > +++ b/kernel/sched.c > @@ -3903,11 +3903,11 @@ static void __wake_up_common(wait_queue_head_= t *q, unsigned int mode, > =C2=A0{ > =C2=A0 =C2=A0 =C2=A0 =C2=A0wait_queue_t *curr, *next; > > - =C2=A0 =C2=A0 =C2=A0 list_for_each_entry_safe(curr, next, &q->task_= list, task_list) { > - =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 unsigned flags =3D= curr->flags; > + =C2=A0 =C2=A0 =C2=A0 list_for_each_entry_safe(curr, next, &q->task_= list, task_list) > + =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 curr->func(curr, m= ode, wake_flags, key); > > - =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 if (curr->func(cur= r, mode, wake_flags, key) && > - =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0= =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 (flags & WQ_FLAG_EXCLUSIVE) && !--n= r_exclusive) > + =C2=A0 =C2=A0 =C2=A0 list_for_each_entry_safe(curr, next, &q->task_= list_ex, task_list) { > + =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 if (curr->func(cur= r, mode, wake_flags, key) && !--nr_exclusive) > =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0= =C2=A0 =C2=A0break; > =C2=A0 =C2=A0 =C2=A0 =C2=A0} > =C2=A0} > diff --git a/kernel/wait.c b/kernel/wait.c > index c4bd3d8..a0559df 100644 > --- a/kernel/wait.c > +++ b/kernel/wait.c > @@ -15,6 +15,7 @@ void __init_waitqueue_head(wait_queue_head_t *q, st= ruct lock_class_key *key) > =C2=A0 =C2=A0 =C2=A0 =C2=A0spin_lock_init(&q->lock); > =C2=A0 =C2=A0 =C2=A0 =C2=A0lockdep_set_class(&q->lock, key); > =C2=A0 =C2=A0 =C2=A0 =C2=A0INIT_LIST_HEAD(&q->task_list); > + =C2=A0 =C2=A0 =C2=A0 INIT_LIST_HEAD(&q->task_list_ex); > =C2=A0} > > =C2=A0EXPORT_SYMBOL(__init_waitqueue_head); > @@ -23,7 +24,6 @@ void add_wait_queue(wait_queue_head_t *q, wait_queu= e_t *wait) > =C2=A0{ > =C2=A0 =C2=A0 =C2=A0 =C2=A0unsigned long flags; > > - =C2=A0 =C2=A0 =C2=A0 wait->flags &=3D ~WQ_FLAG_EXCLUSIVE; > =C2=A0 =C2=A0 =C2=A0 =C2=A0spin_lock_irqsave(&q->lock, flags); > =C2=A0 =C2=A0 =C2=A0 =C2=A0__add_wait_queue(q, wait); > =C2=A0 =C2=A0 =C2=A0 =C2=A0spin_unlock_irqrestore(&q->lock, flags); > @@ -34,9 +34,8 @@ void add_wait_queue_exclusive(wait_queue_head_t *q,= wait_queue_t *wait) > =C2=A0{ > =C2=A0 =C2=A0 =C2=A0 =C2=A0unsigned long flags; > > - =C2=A0 =C2=A0 =C2=A0 wait->flags |=3D WQ_FLAG_EXCLUSIVE; > =C2=A0 =C2=A0 =C2=A0 =C2=A0spin_lock_irqsave(&q->lock, flags); > - =C2=A0 =C2=A0 =C2=A0 __add_wait_queue_tail(q, wait); > + =C2=A0 =C2=A0 =C2=A0 __add_wait_queue_ex(q, wait); > =C2=A0 =C2=A0 =C2=A0 =C2=A0spin_unlock_irqrestore(&q->lock, flags); > =C2=A0} > =C2=A0EXPORT_SYMBOL(add_wait_queue_exclusive); > @@ -69,7 +68,6 @@ prepare_to_wait(wait_queue_head_t *q, wait_queue_t = *wait, int state) > =C2=A0{ > =C2=A0 =C2=A0 =C2=A0 =C2=A0unsigned long flags; > > - =C2=A0 =C2=A0 =C2=A0 wait->flags &=3D ~WQ_FLAG_EXCLUSIVE; > =C2=A0 =C2=A0 =C2=A0 =C2=A0spin_lock_irqsave(&q->lock, flags); > =C2=A0 =C2=A0 =C2=A0 =C2=A0if (list_empty(&wait->task_list)) > =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0__add_wait_que= ue(q, wait); > @@ -83,10 +81,9 @@ prepare_to_wait_exclusive(wait_queue_head_t *q, wa= it_queue_t *wait, int state) > =C2=A0{ > =C2=A0 =C2=A0 =C2=A0 =C2=A0unsigned long flags; > > - =C2=A0 =C2=A0 =C2=A0 wait->flags |=3D WQ_FLAG_EXCLUSIVE; > =C2=A0 =C2=A0 =C2=A0 =C2=A0spin_lock_irqsave(&q->lock, flags); > =C2=A0 =C2=A0 =C2=A0 =C2=A0if (list_empty(&wait->task_list)) > - =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 __add_wait_queue_t= ail(q, wait); > + =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 __add_wait_queue_e= x(q, wait); > =C2=A0 =C2=A0 =C2=A0 =C2=A0set_current_state(state); > =C2=A0 =C2=A0 =C2=A0 =C2=A0spin_unlock_irqrestore(&q->lock, flags); > =C2=A0} > -- > To unsubscribe from this list: send the line "unsubscribe linux-kerne= l" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at =C2=A0http://vger.kernel.org/majordomo-info.ht= ml > Please read the FAQ at =C2=A0http://www.tux.org/lkml/ > -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel= " in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html