* [RFC] sched: implement the exclusive wait queue as a LIFO queue
@ 2010-04-28 5:03 Changli Gao
2010-04-28 6:22 ` Changli Gao
` (3 more replies)
0 siblings, 4 replies; 24+ messages in thread
From: Changli Gao @ 2010-04-28 5:03 UTC (permalink / raw)
To: Ingo Molnar
Cc: Alexander Viro, Andrew Morton, Eric W. Biederman, Davide Libenzi,
Roland Dreier, Stefan Richter, Peter Zijlstra, David S. Miller,
Eric Dumazet, Christoph Lameter, Andreas Herrmann,
Thomas Gleixner, David Howells, Takashi Iwai, linux-fsdevel,
linux-kernel, Changli Gao
implement the exclusive wait queue as a LIFO queue
If the exclusive wait queue is also a LIFO queue as the normal wait queue, the
process who goes to sleep recently, will be woke up first. As its memory is
more likely in cache, we will get better performance. And when there are many
processes waiting on a exclusive wait queue, some of them may not be woke up,
if the others can handle the workload, and it will reduce the load of
the scheduler.
Note: before applying this patch, you need my previous patch patched first.
https://patchwork.kernel.org/patch/95600/
Signed-off-by: Changli Gao <xiaosuo@gmail.com>
----
fs/eventpoll.c | 3 +--
include/linux/wait.h | 17 +++++++----------
kernel/sched.c | 8 ++++----
kernel/wait.c | 9 +++------
4 files changed, 15 insertions(+), 22 deletions(-)
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index bd056a5..e9b3ebe 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1140,8 +1140,7 @@ retry:
* ep_poll_callback() when events will become available.
*/
init_waitqueue_entry(&wait, current);
- wait.flags |= WQ_FLAG_EXCLUSIVE;
- __add_wait_queue(&ep->wq, &wait);
+ __add_wait_queue_ex(&ep->wq, &wait);
for (;;) {
/*
diff --git a/include/linux/wait.h b/include/linux/wait.h
index a48e16b..95c127d 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -30,8 +30,6 @@ typedef int (*wait_queue_func_t)(wait_queue_t *wait, unsigned mode, int flags, v
int default_wake_function(wait_queue_t *wait, unsigned mode, int flags, void *key);
struct __wait_queue {
- unsigned int flags;
-#define WQ_FLAG_EXCLUSIVE 0x01
void *private;
wait_queue_func_t func;
struct list_head task_list;
@@ -50,6 +48,7 @@ struct wait_bit_queue {
struct __wait_queue_head {
spinlock_t lock;
struct list_head task_list;
+ struct list_head task_list_ex;
};
typedef struct __wait_queue_head wait_queue_head_t;
@@ -69,7 +68,8 @@ struct task_struct;
#define __WAIT_QUEUE_HEAD_INITIALIZER(name) { \
.lock = __SPIN_LOCK_UNLOCKED(name.lock), \
- .task_list = { &(name).task_list, &(name).task_list } }
+ .task_list = { &(name).task_list, &(name).task_list }, \
+ .task_list_ex = { &(name).task_list_ex, &(name).task_list_ex } }
#define DECLARE_WAIT_QUEUE_HEAD(name) \
wait_queue_head_t name = __WAIT_QUEUE_HEAD_INITIALIZER(name)
@@ -97,7 +97,6 @@ extern void __init_waitqueue_head(wait_queue_head_t *q, struct lock_class_key *)
static inline void init_waitqueue_entry(wait_queue_t *q, struct task_struct *p)
{
- q->flags = 0;
q->private = p;
q->func = default_wake_function;
}
@@ -105,14 +104,13 @@ static inline void init_waitqueue_entry(wait_queue_t *q, struct task_struct *p)
static inline void init_waitqueue_func_entry(wait_queue_t *q,
wait_queue_func_t func)
{
- q->flags = 0;
q->private = NULL;
q->func = func;
}
static inline int waitqueue_active(wait_queue_head_t *q)
{
- return !list_empty(&q->task_list);
+ return !list_empty(&q->task_list) || !list_empty(&q->task_list);
}
extern void add_wait_queue(wait_queue_head_t *q, wait_queue_t *wait);
@@ -127,10 +125,10 @@ static inline void __add_wait_queue(wait_queue_head_t *head, wait_queue_t *new)
/*
* Used for wake-one threads:
*/
-static inline void __add_wait_queue_tail(wait_queue_head_t *head,
+static inline void __add_wait_queue_ex(wait_queue_head_t *head,
wait_queue_t *new)
{
- list_add_tail(&new->task_list, &head->task_list);
+ list_add(&new->task_list, &head->task_list_ex);
}
static inline void __remove_wait_queue(wait_queue_head_t *head,
@@ -409,8 +407,7 @@ do { \
static inline void add_wait_queue_exclusive_locked(wait_queue_head_t *q,
wait_queue_t * wait)
{
- wait->flags |= WQ_FLAG_EXCLUSIVE;
- __add_wait_queue_tail(q, wait);
+ __add_wait_queue_ex(q, wait);
}
/*
diff --git a/kernel/sched.c b/kernel/sched.c
index be5ab70..59b1534 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -3903,11 +3903,11 @@ static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
{
wait_queue_t *curr, *next;
- list_for_each_entry_safe(curr, next, &q->task_list, task_list) {
- unsigned flags = curr->flags;
+ list_for_each_entry_safe(curr, next, &q->task_list, task_list)
+ curr->func(curr, mode, wake_flags, key);
- if (curr->func(curr, mode, wake_flags, key) &&
- (flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive)
+ list_for_each_entry_safe(curr, next, &q->task_list_ex, task_list) {
+ if (curr->func(curr, mode, wake_flags, key) && !--nr_exclusive)
break;
}
}
diff --git a/kernel/wait.c b/kernel/wait.c
index c4bd3d8..a0559df 100644
--- a/kernel/wait.c
+++ b/kernel/wait.c
@@ -15,6 +15,7 @@ void __init_waitqueue_head(wait_queue_head_t *q, struct lock_class_key *key)
spin_lock_init(&q->lock);
lockdep_set_class(&q->lock, key);
INIT_LIST_HEAD(&q->task_list);
+ INIT_LIST_HEAD(&q->task_list_ex);
}
EXPORT_SYMBOL(__init_waitqueue_head);
@@ -23,7 +24,6 @@ void add_wait_queue(wait_queue_head_t *q, wait_queue_t *wait)
{
unsigned long flags;
- wait->flags &= ~WQ_FLAG_EXCLUSIVE;
spin_lock_irqsave(&q->lock, flags);
__add_wait_queue(q, wait);
spin_unlock_irqrestore(&q->lock, flags);
@@ -34,9 +34,8 @@ void add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_t *wait)
{
unsigned long flags;
- wait->flags |= WQ_FLAG_EXCLUSIVE;
spin_lock_irqsave(&q->lock, flags);
- __add_wait_queue_tail(q, wait);
+ __add_wait_queue_ex(q, wait);
spin_unlock_irqrestore(&q->lock, flags);
}
EXPORT_SYMBOL(add_wait_queue_exclusive);
@@ -69,7 +68,6 @@ prepare_to_wait(wait_queue_head_t *q, wait_queue_t *wait, int state)
{
unsigned long flags;
- wait->flags &= ~WQ_FLAG_EXCLUSIVE;
spin_lock_irqsave(&q->lock, flags);
if (list_empty(&wait->task_list))
__add_wait_queue(q, wait);
@@ -83,10 +81,9 @@ prepare_to_wait_exclusive(wait_queue_head_t *q, wait_queue_t *wait, int state)
{
unsigned long flags;
- wait->flags |= WQ_FLAG_EXCLUSIVE;
spin_lock_irqsave(&q->lock, flags);
if (list_empty(&wait->task_list))
- __add_wait_queue_tail(q, wait);
+ __add_wait_queue_ex(q, wait);
set_current_state(state);
spin_unlock_irqrestore(&q->lock, flags);
}
^ permalink raw reply related [flat|nested] 24+ messages in thread
* Re: [RFC] sched: implement the exclusive wait queue as a LIFO queue
2010-04-28 5:03 [RFC] sched: implement the exclusive wait queue as a LIFO queue Changli Gao
@ 2010-04-28 6:22 ` Changli Gao
2010-04-28 8:05 ` Changli Gao
2010-04-28 7:47 ` Xiaotian Feng
` (2 subsequent siblings)
3 siblings, 1 reply; 24+ messages in thread
From: Changli Gao @ 2010-04-28 6:22 UTC (permalink / raw)
To: Ingo Molnar
Cc: Alexander Viro, Andrew Morton, Eric W. Biederman, Davide Libenzi,
Roland Dreier, Stefan Richter, Peter Zijlstra, David S. Miller,
Eric Dumazet, Christoph Lameter, Andreas Herrmann,
Thomas Gleixner, David Howells, Takashi Iwai, linux-fsdevel,
linux-kernel, Changli Gao
[-- Attachment #1: Type: text/plain, Size: 489 bytes --]
On Wed, Apr 28, 2010 at 1:03 PM, Changli Gao <xiaosuo@gmail.com> wrote:
>
> static inline int waitqueue_active(wait_queue_head_t *q)
> {
> - return !list_empty(&q->task_list);
> + return !list_empty(&q->task_list) || !list_empty(&q->task_list);
> }
>
I am sorry. the later task_list should be task_list_ex. The updated
patch is attached. And I will replace task_list list with hlist for
saving space.
--
Regards,
Changli Gao(xiaosuo@gmail.com)
[-- Attachment #2: wq-ex-lifo.diff --]
[-- Type: text/plain, Size: 5331 bytes --]
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index bd056a5..e9b3ebe 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1140,8 +1140,7 @@ retry:
* ep_poll_callback() when events will become available.
*/
init_waitqueue_entry(&wait, current);
- wait.flags |= WQ_FLAG_EXCLUSIVE;
- __add_wait_queue(&ep->wq, &wait);
+ __add_wait_queue_ex(&ep->wq, &wait);
for (;;) {
/*
diff --git a/include/linux/wait.h b/include/linux/wait.h
index a48e16b..839db33 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -30,8 +30,6 @@ typedef int (*wait_queue_func_t)(wait_queue_t *wait, unsigned mode, int flags, v
int default_wake_function(wait_queue_t *wait, unsigned mode, int flags, void *key);
struct __wait_queue {
- unsigned int flags;
-#define WQ_FLAG_EXCLUSIVE 0x01
void *private;
wait_queue_func_t func;
struct list_head task_list;
@@ -50,6 +48,7 @@ struct wait_bit_queue {
struct __wait_queue_head {
spinlock_t lock;
struct list_head task_list;
+ struct list_head task_list_ex;
};
typedef struct __wait_queue_head wait_queue_head_t;
@@ -69,7 +68,8 @@ struct task_struct;
#define __WAIT_QUEUE_HEAD_INITIALIZER(name) { \
.lock = __SPIN_LOCK_UNLOCKED(name.lock), \
- .task_list = { &(name).task_list, &(name).task_list } }
+ .task_list = { &(name).task_list, &(name).task_list }, \
+ .task_list_ex = { &(name).task_list_ex, &(name).task_list_ex } }
#define DECLARE_WAIT_QUEUE_HEAD(name) \
wait_queue_head_t name = __WAIT_QUEUE_HEAD_INITIALIZER(name)
@@ -97,7 +97,6 @@ extern void __init_waitqueue_head(wait_queue_head_t *q, struct lock_class_key *)
static inline void init_waitqueue_entry(wait_queue_t *q, struct task_struct *p)
{
- q->flags = 0;
q->private = p;
q->func = default_wake_function;
}
@@ -105,14 +104,13 @@ static inline void init_waitqueue_entry(wait_queue_t *q, struct task_struct *p)
static inline void init_waitqueue_func_entry(wait_queue_t *q,
wait_queue_func_t func)
{
- q->flags = 0;
q->private = NULL;
q->func = func;
}
static inline int waitqueue_active(wait_queue_head_t *q)
{
- return !list_empty(&q->task_list);
+ return !list_empty(&q->task_list) || !list_empty(&q->task_list_ex);
}
extern void add_wait_queue(wait_queue_head_t *q, wait_queue_t *wait);
@@ -127,10 +125,10 @@ static inline void __add_wait_queue(wait_queue_head_t *head, wait_queue_t *new)
/*
* Used for wake-one threads:
*/
-static inline void __add_wait_queue_tail(wait_queue_head_t *head,
+static inline void __add_wait_queue_ex(wait_queue_head_t *head,
wait_queue_t *new)
{
- list_add_tail(&new->task_list, &head->task_list);
+ list_add(&new->task_list, &head->task_list_ex);
}
static inline void __remove_wait_queue(wait_queue_head_t *head,
@@ -409,8 +407,7 @@ do { \
static inline void add_wait_queue_exclusive_locked(wait_queue_head_t *q,
wait_queue_t * wait)
{
- wait->flags |= WQ_FLAG_EXCLUSIVE;
- __add_wait_queue_tail(q, wait);
+ __add_wait_queue_ex(q, wait);
}
/*
diff --git a/kernel/sched.c b/kernel/sched.c
index be5ab70..59b1534 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -3903,11 +3903,11 @@ static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
{
wait_queue_t *curr, *next;
- list_for_each_entry_safe(curr, next, &q->task_list, task_list) {
- unsigned flags = curr->flags;
+ list_for_each_entry_safe(curr, next, &q->task_list, task_list)
+ curr->func(curr, mode, wake_flags, key);
- if (curr->func(curr, mode, wake_flags, key) &&
- (flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive)
+ list_for_each_entry_safe(curr, next, &q->task_list_ex, task_list) {
+ if (curr->func(curr, mode, wake_flags, key) && !--nr_exclusive)
break;
}
}
diff --git a/kernel/wait.c b/kernel/wait.c
index c4bd3d8..a0559df 100644
--- a/kernel/wait.c
+++ b/kernel/wait.c
@@ -15,6 +15,7 @@ void __init_waitqueue_head(wait_queue_head_t *q, struct lock_class_key *key)
spin_lock_init(&q->lock);
lockdep_set_class(&q->lock, key);
INIT_LIST_HEAD(&q->task_list);
+ INIT_LIST_HEAD(&q->task_list_ex);
}
EXPORT_SYMBOL(__init_waitqueue_head);
@@ -23,7 +24,6 @@ void add_wait_queue(wait_queue_head_t *q, wait_queue_t *wait)
{
unsigned long flags;
- wait->flags &= ~WQ_FLAG_EXCLUSIVE;
spin_lock_irqsave(&q->lock, flags);
__add_wait_queue(q, wait);
spin_unlock_irqrestore(&q->lock, flags);
@@ -34,9 +34,8 @@ void add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_t *wait)
{
unsigned long flags;
- wait->flags |= WQ_FLAG_EXCLUSIVE;
spin_lock_irqsave(&q->lock, flags);
- __add_wait_queue_tail(q, wait);
+ __add_wait_queue_ex(q, wait);
spin_unlock_irqrestore(&q->lock, flags);
}
EXPORT_SYMBOL(add_wait_queue_exclusive);
@@ -69,7 +68,6 @@ prepare_to_wait(wait_queue_head_t *q, wait_queue_t *wait, int state)
{
unsigned long flags;
- wait->flags &= ~WQ_FLAG_EXCLUSIVE;
spin_lock_irqsave(&q->lock, flags);
if (list_empty(&wait->task_list))
__add_wait_queue(q, wait);
@@ -83,10 +81,9 @@ prepare_to_wait_exclusive(wait_queue_head_t *q, wait_queue_t *wait, int state)
{
unsigned long flags;
- wait->flags |= WQ_FLAG_EXCLUSIVE;
spin_lock_irqsave(&q->lock, flags);
if (list_empty(&wait->task_list))
- __add_wait_queue_tail(q, wait);
+ __add_wait_queue_ex(q, wait);
set_current_state(state);
spin_unlock_irqrestore(&q->lock, flags);
}
^ permalink raw reply related [flat|nested] 24+ messages in thread
* Re: [RFC] sched: implement the exclusive wait queue as a LIFO queue
2010-04-28 5:03 [RFC] sched: implement the exclusive wait queue as a LIFO queue Changli Gao
2010-04-28 6:22 ` Changli Gao
@ 2010-04-28 7:47 ` Xiaotian Feng
2010-04-28 7:52 ` Changli Gao
2010-04-28 9:32 ` David Howells
2010-04-28 9:34 ` David Howells
3 siblings, 1 reply; 24+ messages in thread
From: Xiaotian Feng @ 2010-04-28 7:47 UTC (permalink / raw)
To: Changli Gao
Cc: Ingo Molnar, Alexander Viro, Andrew Morton, Eric W. Biederman,
Davide Libenzi, Roland Dreier, Stefan Richter, Peter Zijlstra,
David S. Miller, Eric Dumazet, Christoph Lameter,
Andreas Herrmann, Thomas Gleixner, David Howells, Takashi Iwai,
linux-fsdevel, linux-kernel
On Wed, Apr 28, 2010 at 1:03 PM, Changli Gao <xiaosuo@gmail.com> wrote:
> implement the exclusive wait queue as a LIFO queue
>
> If the exclusive wait queue is also a LIFO queue as the normal wait queue, the
> process who goes to sleep recently, will be woke up first. As its memory is
> more likely in cache, we will get better performance. And when there are many
> processes waiting on a exclusive wait queue, some of them may not be woke up,
> if the others can handle the workload, and it will reduce the load of
> the scheduler.
>
Starve some processes for performance?
> Note: before applying this patch, you need my previous patch patched first.
> https://patchwork.kernel.org/patch/95600/
>
> Signed-off-by: Changli Gao <xiaosuo@gmail.com>
> ----
> fs/eventpoll.c | 3 +--
> include/linux/wait.h | 17 +++++++----------
> kernel/sched.c | 8 ++++----
> kernel/wait.c | 9 +++------
> 4 files changed, 15 insertions(+), 22 deletions(-)
> diff --git a/fs/eventpoll.c b/fs/eventpoll.c
> index bd056a5..e9b3ebe 100644
> --- a/fs/eventpoll.c
> +++ b/fs/eventpoll.c
> @@ -1140,8 +1140,7 @@ retry:
> * ep_poll_callback() when events will become available.
> */
> init_waitqueue_entry(&wait, current);
> - wait.flags |= WQ_FLAG_EXCLUSIVE;
> - __add_wait_queue(&ep->wq, &wait);
> + __add_wait_queue_ex(&ep->wq, &wait);
>
> for (;;) {
> /*
> diff --git a/include/linux/wait.h b/include/linux/wait.h
> index a48e16b..95c127d 100644
> --- a/include/linux/wait.h
> +++ b/include/linux/wait.h
> @@ -30,8 +30,6 @@ typedef int (*wait_queue_func_t)(wait_queue_t *wait, unsigned mode, int flags, v
> int default_wake_function(wait_queue_t *wait, unsigned mode, int flags, void *key);
>
> struct __wait_queue {
> - unsigned int flags;
> -#define WQ_FLAG_EXCLUSIVE 0x01
> void *private;
> wait_queue_func_t func;
> struct list_head task_list;
> @@ -50,6 +48,7 @@ struct wait_bit_queue {
> struct __wait_queue_head {
> spinlock_t lock;
> struct list_head task_list;
> + struct list_head task_list_ex;
> };
> typedef struct __wait_queue_head wait_queue_head_t;
>
> @@ -69,7 +68,8 @@ struct task_struct;
>
> #define __WAIT_QUEUE_HEAD_INITIALIZER(name) { \
> .lock = __SPIN_LOCK_UNLOCKED(name.lock), \
> - .task_list = { &(name).task_list, &(name).task_list } }
> + .task_list = { &(name).task_list, &(name).task_list }, \
> + .task_list_ex = { &(name).task_list_ex, &(name).task_list_ex } }
>
> #define DECLARE_WAIT_QUEUE_HEAD(name) \
> wait_queue_head_t name = __WAIT_QUEUE_HEAD_INITIALIZER(name)
> @@ -97,7 +97,6 @@ extern void __init_waitqueue_head(wait_queue_head_t *q, struct lock_class_key *)
>
> static inline void init_waitqueue_entry(wait_queue_t *q, struct task_struct *p)
> {
> - q->flags = 0;
> q->private = p;
> q->func = default_wake_function;
> }
> @@ -105,14 +104,13 @@ static inline void init_waitqueue_entry(wait_queue_t *q, struct task_struct *p)
> static inline void init_waitqueue_func_entry(wait_queue_t *q,
> wait_queue_func_t func)
> {
> - q->flags = 0;
> q->private = NULL;
> q->func = func;
> }
>
> static inline int waitqueue_active(wait_queue_head_t *q)
> {
> - return !list_empty(&q->task_list);
> + return !list_empty(&q->task_list) || !list_empty(&q->task_list);
> }
>
> extern void add_wait_queue(wait_queue_head_t *q, wait_queue_t *wait);
> @@ -127,10 +125,10 @@ static inline void __add_wait_queue(wait_queue_head_t *head, wait_queue_t *new)
> /*
> * Used for wake-one threads:
> */
> -static inline void __add_wait_queue_tail(wait_queue_head_t *head,
> +static inline void __add_wait_queue_ex(wait_queue_head_t *head,
> wait_queue_t *new)
> {
> - list_add_tail(&new->task_list, &head->task_list);
> + list_add(&new->task_list, &head->task_list_ex);
> }
>
> static inline void __remove_wait_queue(wait_queue_head_t *head,
> @@ -409,8 +407,7 @@ do { \
> static inline void add_wait_queue_exclusive_locked(wait_queue_head_t *q,
> wait_queue_t * wait)
> {
> - wait->flags |= WQ_FLAG_EXCLUSIVE;
> - __add_wait_queue_tail(q, wait);
> + __add_wait_queue_ex(q, wait);
> }
>
> /*
> diff --git a/kernel/sched.c b/kernel/sched.c
> index be5ab70..59b1534 100644
> --- a/kernel/sched.c
> +++ b/kernel/sched.c
> @@ -3903,11 +3903,11 @@ static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
> {
> wait_queue_t *curr, *next;
>
> - list_for_each_entry_safe(curr, next, &q->task_list, task_list) {
> - unsigned flags = curr->flags;
> + list_for_each_entry_safe(curr, next, &q->task_list, task_list)
> + curr->func(curr, mode, wake_flags, key);
>
> - if (curr->func(curr, mode, wake_flags, key) &&
> - (flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive)
> + list_for_each_entry_safe(curr, next, &q->task_list_ex, task_list) {
> + if (curr->func(curr, mode, wake_flags, key) && !--nr_exclusive)
> break;
> }
> }
> diff --git a/kernel/wait.c b/kernel/wait.c
> index c4bd3d8..a0559df 100644
> --- a/kernel/wait.c
> +++ b/kernel/wait.c
> @@ -15,6 +15,7 @@ void __init_waitqueue_head(wait_queue_head_t *q, struct lock_class_key *key)
> spin_lock_init(&q->lock);
> lockdep_set_class(&q->lock, key);
> INIT_LIST_HEAD(&q->task_list);
> + INIT_LIST_HEAD(&q->task_list_ex);
> }
>
> EXPORT_SYMBOL(__init_waitqueue_head);
> @@ -23,7 +24,6 @@ void add_wait_queue(wait_queue_head_t *q, wait_queue_t *wait)
> {
> unsigned long flags;
>
> - wait->flags &= ~WQ_FLAG_EXCLUSIVE;
> spin_lock_irqsave(&q->lock, flags);
> __add_wait_queue(q, wait);
> spin_unlock_irqrestore(&q->lock, flags);
> @@ -34,9 +34,8 @@ void add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_t *wait)
> {
> unsigned long flags;
>
> - wait->flags |= WQ_FLAG_EXCLUSIVE;
> spin_lock_irqsave(&q->lock, flags);
> - __add_wait_queue_tail(q, wait);
> + __add_wait_queue_ex(q, wait);
> spin_unlock_irqrestore(&q->lock, flags);
> }
> EXPORT_SYMBOL(add_wait_queue_exclusive);
> @@ -69,7 +68,6 @@ prepare_to_wait(wait_queue_head_t *q, wait_queue_t *wait, int state)
> {
> unsigned long flags;
>
> - wait->flags &= ~WQ_FLAG_EXCLUSIVE;
> spin_lock_irqsave(&q->lock, flags);
> if (list_empty(&wait->task_list))
> __add_wait_queue(q, wait);
> @@ -83,10 +81,9 @@ prepare_to_wait_exclusive(wait_queue_head_t *q, wait_queue_t *wait, int state)
> {
> unsigned long flags;
>
> - wait->flags |= WQ_FLAG_EXCLUSIVE;
> spin_lock_irqsave(&q->lock, flags);
> if (list_empty(&wait->task_list))
> - __add_wait_queue_tail(q, wait);
> + __add_wait_queue_ex(q, wait);
> set_current_state(state);
> spin_unlock_irqrestore(&q->lock, flags);
> }
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/
>
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [RFC] sched: implement the exclusive wait queue as a LIFO queue
2010-04-28 7:47 ` Xiaotian Feng
@ 2010-04-28 7:52 ` Changli Gao
2010-04-28 8:15 ` Yong Zhang
0 siblings, 1 reply; 24+ messages in thread
From: Changli Gao @ 2010-04-28 7:52 UTC (permalink / raw)
To: Xiaotian Feng
Cc: Ingo Molnar, Alexander Viro, Andrew Morton, Eric W. Biederman,
Davide Libenzi, Roland Dreier, Stefan Richter, Peter Zijlstra,
David S. Miller, Eric Dumazet, Christoph Lameter,
Andreas Herrmann, Thomas Gleixner, David Howells, Takashi Iwai,
linux-fsdevel, linux-kernel
On Wed, Apr 28, 2010 at 3:47 PM, Xiaotian Feng <xtfeng@gmail.com> wrote:
> On Wed, Apr 28, 2010 at 1:03 PM, Changli Gao <xiaosuo@gmail.com> wrote:
>> implement the exclusive wait queue as a LIFO queue
>>
>> If the exclusive wait queue is also a LIFO queue as the normal wait queue, the
>> process who goes to sleep recently, will be woke up first. As its memory is
>> more likely in cache, we will get better performance. And when there are many
>> processes waiting on a exclusive wait queue, some of them may not be woke up,
>> if the others can handle the workload, and it will reduce the load of
>> the scheduler.
>>
>
> Starve some processes for performance?
>
Starve? Oh, No. If we don't need these processes, and we can do better
without them, why we wake them up?
--
Regards,
Changli Gao(xiaosuo@gmail.com)
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [RFC] sched: implement the exclusive wait queue as a LIFO queue
2010-04-28 6:22 ` Changli Gao
@ 2010-04-28 8:05 ` Changli Gao
0 siblings, 0 replies; 24+ messages in thread
From: Changli Gao @ 2010-04-28 8:05 UTC (permalink / raw)
To: Ingo Molnar
Cc: Alexander Viro, Andrew Morton, Eric W. Biederman, Davide Libenzi,
Roland Dreier, Stefan Richter, Peter Zijlstra, David S. Miller,
Eric Dumazet, Christoph Lameter, Andreas Herrmann,
Thomas Gleixner, David Howells, Takashi Iwai, linux-fsdevel,
linux-kernel, Changli Gao
[-- Attachment #1: Type: text/plain, Size: 333 bytes --]
On Wed, Apr 28, 2010 at 2:22 PM, Changli Gao <xiaosuo@gmail.com> wrote:
>
> I am sorry. the later task_list should be task_list_ex. The updated
> patch is attached. And I will replace task_list list with hlist for
> saving space.
>
This version replaces list with hlist.
--
Regards,
Changli Gao(xiaosuo@gmail.com)
[-- Attachment #2: wq-ex-lifo.diff --]
[-- Type: text/plain, Size: 8628 bytes --]
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index bd056a5..e9b3ebe 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1140,8 +1140,7 @@ retry:
* ep_poll_callback() when events will become available.
*/
init_waitqueue_entry(&wait, current);
- wait.flags |= WQ_FLAG_EXCLUSIVE;
- __add_wait_queue(&ep->wq, &wait);
+ __add_wait_queue_ex(&ep->wq, &wait);
for (;;) {
/*
diff --git a/include/linux/list.h b/include/linux/list.h
index 8392884..3622d90 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -573,6 +573,11 @@ static inline int hlist_unhashed(const struct hlist_node *h)
return !h->pprev;
}
+static inline int hlist_unhashed_careful(const struct hlist_node *h)
+{
+ return !h->pprev && !h->next;
+}
+
static inline int hlist_empty(const struct hlist_head *h)
{
return !h->first;
diff --git a/include/linux/wait.h b/include/linux/wait.h
index a48e16b..5f42123 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -30,11 +30,9 @@ typedef int (*wait_queue_func_t)(wait_queue_t *wait, unsigned mode, int flags, v
int default_wake_function(wait_queue_t *wait, unsigned mode, int flags, void *key);
struct __wait_queue {
- unsigned int flags;
-#define WQ_FLAG_EXCLUSIVE 0x01
void *private;
wait_queue_func_t func;
- struct list_head task_list;
+ struct hlist_node task_list;
};
struct wait_bit_key {
@@ -49,7 +47,8 @@ struct wait_bit_queue {
struct __wait_queue_head {
spinlock_t lock;
- struct list_head task_list;
+ struct hlist_head task_list;
+ struct hlist_head task_list_ex;
};
typedef struct __wait_queue_head wait_queue_head_t;
@@ -69,7 +68,8 @@ struct task_struct;
#define __WAIT_QUEUE_HEAD_INITIALIZER(name) { \
.lock = __SPIN_LOCK_UNLOCKED(name.lock), \
- .task_list = { &(name).task_list, &(name).task_list } }
+ .task_list = HLIST_HEAD_INIT, \
+ .task_list_ex = HLIST_HEAD_INIT }
#define DECLARE_WAIT_QUEUE_HEAD(name) \
wait_queue_head_t name = __WAIT_QUEUE_HEAD_INITIALIZER(name)
@@ -97,7 +97,6 @@ extern void __init_waitqueue_head(wait_queue_head_t *q, struct lock_class_key *)
static inline void init_waitqueue_entry(wait_queue_t *q, struct task_struct *p)
{
- q->flags = 0;
q->private = p;
q->func = default_wake_function;
}
@@ -105,14 +104,13 @@ static inline void init_waitqueue_entry(wait_queue_t *q, struct task_struct *p)
static inline void init_waitqueue_func_entry(wait_queue_t *q,
wait_queue_func_t func)
{
- q->flags = 0;
q->private = NULL;
q->func = func;
}
static inline int waitqueue_active(wait_queue_head_t *q)
{
- return !list_empty(&q->task_list);
+ return !hlist_empty(&q->task_list) || !hlist_empty(&q->task_list_ex);
}
extern void add_wait_queue(wait_queue_head_t *q, wait_queue_t *wait);
@@ -121,22 +119,22 @@ extern void remove_wait_queue(wait_queue_head_t *q, wait_queue_t *wait);
static inline void __add_wait_queue(wait_queue_head_t *head, wait_queue_t *new)
{
- list_add(&new->task_list, &head->task_list);
+ hlist_add_head(&new->task_list, &head->task_list);
}
/*
* Used for wake-one threads:
*/
-static inline void __add_wait_queue_tail(wait_queue_head_t *head,
+static inline void __add_wait_queue_ex(wait_queue_head_t *head,
wait_queue_t *new)
{
- list_add_tail(&new->task_list, &head->task_list);
+ hlist_add_head(&new->task_list, &head->task_list_ex);
}
static inline void __remove_wait_queue(wait_queue_head_t *head,
wait_queue_t *old)
{
- list_del(&old->task_list);
+ hlist_del(&old->task_list);
}
void __wake_up(wait_queue_head_t *q, unsigned int mode, int nr, void *key);
@@ -409,8 +407,7 @@ do { \
static inline void add_wait_queue_exclusive_locked(wait_queue_head_t *q,
wait_queue_t * wait)
{
- wait->flags |= WQ_FLAG_EXCLUSIVE;
- __add_wait_queue_tail(q, wait);
+ __add_wait_queue_ex(q, wait);
}
/*
@@ -449,7 +446,7 @@ int wake_bit_function(wait_queue_t *wait, unsigned mode, int sync, void *key);
wait_queue_t name = { \
.private = current, \
.func = function, \
- .task_list = LIST_HEAD_INIT((name).task_list), \
+ .task_list = { NULL, NULL }, \
}
#define DEFINE_WAIT(name) DEFINE_WAIT_FUNC(name, autoremove_wake_function)
@@ -460,8 +457,7 @@ int wake_bit_function(wait_queue_t *wait, unsigned mode, int sync, void *key);
.wait = { \
.private = current, \
.func = wake_bit_function, \
- .task_list = \
- LIST_HEAD_INIT((name).wait.task_list), \
+ .task_list = { NULL, NULL }, \
}, \
}
@@ -469,7 +465,7 @@ int wake_bit_function(wait_queue_t *wait, unsigned mode, int sync, void *key);
do { \
(wait)->private = current; \
(wait)->func = autoremove_wake_function; \
- INIT_LIST_HEAD(&(wait)->task_list); \
+ INIT_HLIST_NODE(&(wait)->task_list); \
} while (0)
/**
diff --git a/kernel/sched.c b/kernel/sched.c
index be5ab70..813fd44 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -3901,13 +3901,14 @@ EXPORT_SYMBOL(default_wake_function);
static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
int nr_exclusive, int wake_flags, void *key)
{
- wait_queue_t *curr, *next;
+ wait_queue_t *curr;
+ struct hlist_node *pos, *n;
- list_for_each_entry_safe(curr, next, &q->task_list, task_list) {
- unsigned flags = curr->flags;
+ hlist_for_each_entry_safe(curr, pos, n, &q->task_list, task_list)
+ curr->func(curr, mode, wake_flags, key);
- if (curr->func(curr, mode, wake_flags, key) &&
- (flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive)
+ hlist_for_each_entry_safe(curr, pos, n, &q->task_list_ex, task_list) {
+ if (curr->func(curr, mode, wake_flags, key) && !--nr_exclusive)
break;
}
}
diff --git a/kernel/wait.c b/kernel/wait.c
index c4bd3d8..b4278f0 100644
--- a/kernel/wait.c
+++ b/kernel/wait.c
@@ -14,7 +14,8 @@ void __init_waitqueue_head(wait_queue_head_t *q, struct lock_class_key *key)
{
spin_lock_init(&q->lock);
lockdep_set_class(&q->lock, key);
- INIT_LIST_HEAD(&q->task_list);
+ INIT_HLIST_HEAD(&q->task_list);
+ INIT_HLIST_HEAD(&q->task_list_ex);
}
EXPORT_SYMBOL(__init_waitqueue_head);
@@ -23,7 +24,6 @@ void add_wait_queue(wait_queue_head_t *q, wait_queue_t *wait)
{
unsigned long flags;
- wait->flags &= ~WQ_FLAG_EXCLUSIVE;
spin_lock_irqsave(&q->lock, flags);
__add_wait_queue(q, wait);
spin_unlock_irqrestore(&q->lock, flags);
@@ -34,9 +34,8 @@ void add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_t *wait)
{
unsigned long flags;
- wait->flags |= WQ_FLAG_EXCLUSIVE;
spin_lock_irqsave(&q->lock, flags);
- __add_wait_queue_tail(q, wait);
+ __add_wait_queue_ex(q, wait);
spin_unlock_irqrestore(&q->lock, flags);
}
EXPORT_SYMBOL(add_wait_queue_exclusive);
@@ -69,9 +68,8 @@ prepare_to_wait(wait_queue_head_t *q, wait_queue_t *wait, int state)
{
unsigned long flags;
- wait->flags &= ~WQ_FLAG_EXCLUSIVE;
spin_lock_irqsave(&q->lock, flags);
- if (list_empty(&wait->task_list))
+ if (hlist_unhashed(&wait->task_list))
__add_wait_queue(q, wait);
set_current_state(state);
spin_unlock_irqrestore(&q->lock, flags);
@@ -83,10 +81,9 @@ prepare_to_wait_exclusive(wait_queue_head_t *q, wait_queue_t *wait, int state)
{
unsigned long flags;
- wait->flags |= WQ_FLAG_EXCLUSIVE;
spin_lock_irqsave(&q->lock, flags);
- if (list_empty(&wait->task_list))
- __add_wait_queue_tail(q, wait);
+ if (hlist_unhashed(&wait->task_list))
+ __add_wait_queue_ex(q, wait);
set_current_state(state);
spin_unlock_irqrestore(&q->lock, flags);
}
@@ -119,9 +116,9 @@ void finish_wait(wait_queue_head_t *q, wait_queue_t *wait)
* have _one_ other CPU that looks at or modifies
* the list).
*/
- if (!list_empty_careful(&wait->task_list)) {
+ if (!hlist_unhashed_careful(&wait->task_list)) {
spin_lock_irqsave(&q->lock, flags);
- list_del_init(&wait->task_list);
+ hlist_del_init(&wait->task_list);
spin_unlock_irqrestore(&q->lock, flags);
}
}
@@ -152,8 +149,8 @@ void abort_exclusive_wait(wait_queue_head_t *q, wait_queue_t *wait,
__set_current_state(TASK_RUNNING);
spin_lock_irqsave(&q->lock, flags);
- if (!list_empty(&wait->task_list))
- list_del_init(&wait->task_list);
+ if (!hlist_unhashed(&wait->task_list))
+ hlist_del_init(&wait->task_list);
else if (waitqueue_active(q))
__wake_up_locked_key(q, mode, key);
spin_unlock_irqrestore(&q->lock, flags);
@@ -165,7 +162,7 @@ int autoremove_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *
int ret = default_wake_function(wait, mode, sync, key);
if (ret)
- list_del_init(&wait->task_list);
+ hlist_del_init(&wait->task_list);
return ret;
}
EXPORT_SYMBOL(autoremove_wake_function);
^ permalink raw reply related [flat|nested] 24+ messages in thread
* Re: [RFC] sched: implement the exclusive wait queue as a LIFO queue
2010-04-28 7:52 ` Changli Gao
@ 2010-04-28 8:15 ` Yong Zhang
2010-04-28 8:23 ` Changli Gao
2010-04-28 9:29 ` David Howells
0 siblings, 2 replies; 24+ messages in thread
From: Yong Zhang @ 2010-04-28 8:15 UTC (permalink / raw)
To: Changli Gao
Cc: Xiaotian Feng, Ingo Molnar, Alexander Viro, Andrew Morton,
Eric W. Biederman, Davide Libenzi, Roland Dreier, Stefan Richter,
Peter Zijlstra, David S. Miller, Eric Dumazet, Christoph Lameter,
Andreas Herrmann, Thomas Gleixner, David Howells, Takashi Iwai,
linux-fsdevel, linux-kernel
On Wed, Apr 28, 2010 at 03:52:01PM +0800, Changli Gao wrote:
> On Wed, Apr 28, 2010 at 3:47 PM, Xiaotian Feng <xtfeng@gmail.com> wrote:
> > On Wed, Apr 28, 2010 at 1:03 PM, Changli Gao <xiaosuo@gmail.com> wrote:
> >> implement the exclusive wait queue as a LIFO queue
> >>
> >> If the exclusive wait queue is also a LIFO queue as the normal wait queue, the
> >> process who goes to sleep recently, will be woke up first. As its memory is
> >> more likely in cache, we will get better performance. And when there are many
> >> processes waiting on a exclusive wait queue, some of them may not be woke up,
> >> if the others can handle the workload, and it will reduce the load of
> >> the scheduler.
> >>
> >
> > Starve some processes for performance?
> >
>
> Starve? Oh, No. If we don't need these processes, and we can do better
What do you mean "we don't need these processes"?
> without them, why we wake them up?
So some processs(at the tail of exclusive list)will be treated abnormally
and it will sleep for a long time, is this reasonable?
>
>
> --
> Regards,
> Changli Gao(xiaosuo@gmail.com)
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [RFC] sched: implement the exclusive wait queue as a LIFO queue
2010-04-28 8:15 ` Yong Zhang
@ 2010-04-28 8:23 ` Changli Gao
2010-04-28 9:25 ` Johannes Weiner
2010-04-28 9:29 ` David Howells
1 sibling, 1 reply; 24+ messages in thread
From: Changli Gao @ 2010-04-28 8:23 UTC (permalink / raw)
To: Yong Zhang
Cc: Xiaotian Feng, Ingo Molnar, Alexander Viro, Andrew Morton,
Eric W. Biederman, Davide Libenzi, Roland Dreier, Stefan Richter,
Peter Zijlstra, David S. Miller, Eric Dumazet, Christoph Lameter,
Andreas Herrmann, Thomas Gleixner, David Howells, Takashi Iwai,
linux-fsdevel, linux-kernel
On Wed, Apr 28, 2010 at 4:15 PM, Yong Zhang <yong.zhang@windriver.com> wrote:
>
> What do you mean "we don't need these processes"?
If the work is less than the workers, we don't need the workers at the
tail of the exculsive list.
>
>> without them, why we wake them up?
>
> So some processs(at the tail of exclusive list)will be treated abnormally
> and it will sleep for a long time, is this reasonable?
>
If there isn't enough work to be done, we'd better not disrupt them
and leave them sleeping forever to keep the scheduler happier. Do we
have reason to keep fair to all the workers? Does it have benefit?
--
Regards,
Changli Gao(xiaosuo@gmail.com)
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [RFC] sched: implement the exclusive wait queue as a LIFO queue
2010-04-28 8:23 ` Changli Gao
@ 2010-04-28 9:25 ` Johannes Weiner
0 siblings, 0 replies; 24+ messages in thread
From: Johannes Weiner @ 2010-04-28 9:25 UTC (permalink / raw)
To: Changli Gao
Cc: Yong Zhang, Xiaotian Feng, Ingo Molnar, Alexander Viro,
Andrew Morton, Eric W. Biederman, Davide Libenzi, Roland Dreier,
Stefan Richter, Peter Zijlstra, David S. Miller, Eric Dumazet,
Christoph Lameter, Andreas Herrmann, Thomas Gleixner,
David Howells, Takashi Iwai, linux-fsdevel, linux-kernel
On Wed, Apr 28, 2010 at 04:23:52PM +0800, Changli Gao wrote:
> On Wed, Apr 28, 2010 at 4:15 PM, Yong Zhang <yong.zhang@windriver.com> wrote:
> >
> > What do you mean "we don't need these processes"?
>
> If the work is less than the workers, we don't need the workers at the
> tail of the exculsive list.
Have you checked how exclusive waitqueues are even used?
> > So some processs(at the tail of exclusive list)will be treated abnormally
> > and it will sleep for a long time, is this reasonable?
> >
>
> If there isn't enough work to be done, we'd better not disrupt them
> and leave them sleeping forever to keep the scheduler happier. Do we
> have reason to keep fair to all the workers? Does it have benefit?
How about starving lock contenders? See wait_on_bit_lock() and grep
for the users e.g.
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [RFC] sched: implement the exclusive wait queue as a LIFO queue
2010-04-28 8:15 ` Yong Zhang
2010-04-28 8:23 ` Changli Gao
@ 2010-04-28 9:29 ` David Howells
2010-04-28 11:17 ` Changli Gao
2010-04-28 13:21 ` David Howells
1 sibling, 2 replies; 24+ messages in thread
From: David Howells @ 2010-04-28 9:29 UTC (permalink / raw)
To: Changli Gao
Cc: dhowells, Yong Zhang, Xiaotian Feng, Ingo Molnar, Alexander Viro,
Andrew Morton, Eric W. Biederman, Davide Libenzi, Roland Dreier,
Stefan Richter, Peter Zijlstra, David S. Miller, Eric Dumazet,
Christoph Lameter, Andreas Herrmann, Thomas Gleixner,
Takashi Iwai, linux-fsdevel, linux-kernel
Changli Gao <xiaosuo@gmail.com> wrote:
> If there isn't enough work to be done, we'd better not disrupt them
> and leave them sleeping forever to keep the scheduler happier. Do we
> have reason to keep fair to all the workers? Does it have benefit?
You've made one important assumption: the processes on the wait queue are
sleeping waiting to service things... but what if the wait queue governs
access to a resource, and all the processes on that wait queue need access to
that resource to do things? Some of the processes waiting for it may never
get a go, and so necessary work may be left undone.
So NACK.
David
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [RFC] sched: implement the exclusive wait queue as a LIFO queue
2010-04-28 5:03 [RFC] sched: implement the exclusive wait queue as a LIFO queue Changli Gao
2010-04-28 6:22 ` Changli Gao
2010-04-28 7:47 ` Xiaotian Feng
@ 2010-04-28 9:32 ` David Howells
2010-04-28 13:56 ` Changli Gao
2010-04-28 14:06 ` David Howells
2010-04-28 9:34 ` David Howells
3 siblings, 2 replies; 24+ messages in thread
From: David Howells @ 2010-04-28 9:32 UTC (permalink / raw)
To: Changli Gao
Cc: dhowells, Ingo Molnar, Alexander Viro, Andrew Morton,
Eric W. Biederman, Davide Libenzi, Roland Dreier, Stefan Richter,
Peter Zijlstra, David S. Miller, Eric Dumazet, Christoph Lameter,
Andreas Herrmann, Thomas Gleixner, Takashi Iwai, linux-fsdevel,
linux-kernel
Changli Gao <xiaosuo@gmail.com> wrote:
> @@ -50,6 +48,7 @@ struct wait_bit_queue {
> struct __wait_queue_head {
> spinlock_t lock;
> struct list_head task_list;
> + struct list_head task_list_ex;
It would be preferable it if you could avoid making struct __wait_queue_head
bigger. That will increase the size of a lot of things.
David
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [RFC] sched: implement the exclusive wait queue as a LIFO queue
2010-04-28 5:03 [RFC] sched: implement the exclusive wait queue as a LIFO queue Changli Gao
` (2 preceding siblings ...)
2010-04-28 9:32 ` David Howells
@ 2010-04-28 9:34 ` David Howells
2010-04-28 13:47 ` Changli Gao
3 siblings, 1 reply; 24+ messages in thread
From: David Howells @ 2010-04-28 9:34 UTC (permalink / raw)
To: Changli Gao
Cc: dhowells, Ingo Molnar, Alexander Viro, Andrew Morton,
Eric W. Biederman, Davide Libenzi, Roland Dreier, Stefan Richter,
Peter Zijlstra, David S. Miller, Eric Dumazet, Christoph Lameter,
Andreas Herrmann, Thomas Gleixner, Takashi Iwai, linux-fsdevel,
linux-kernel
Can you split the wait queue code and differentiate exclusive wait queues with
LIFO functionality from wait queues with FIFO functionality. I can see why
your suggestion is desirable.
David
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [RFC] sched: implement the exclusive wait queue as a LIFO queue
2010-04-28 9:29 ` David Howells
@ 2010-04-28 11:17 ` Changli Gao
2010-04-28 13:21 ` Jamie Lokier
2010-04-28 18:57 ` Davide Libenzi
2010-04-28 13:21 ` David Howells
1 sibling, 2 replies; 24+ messages in thread
From: Changli Gao @ 2010-04-28 11:17 UTC (permalink / raw)
To: David Howells
Cc: Yong Zhang, Xiaotian Feng, Ingo Molnar, Alexander Viro,
Andrew Morton, Eric W. Biederman, Davide Libenzi, Roland Dreier,
Stefan Richter, Peter Zijlstra, David S. Miller, Eric Dumazet,
Christoph Lameter, Andreas Herrmann, Thomas Gleixner,
Takashi Iwai, linux-fsdevel, linux-kernel
On Wed, Apr 28, 2010 at 5:29 PM, David Howells <dhowells@redhat.com> wrote:
> Changli Gao <xiaosuo@gmail.com> wrote:
>
>> If there isn't enough work to be done, we'd better not disrupt them
>> and leave them sleeping forever to keep the scheduler happier. Do we
>> have reason to keep fair to all the workers? Does it have benefit?
>
> You've made one important assumption: the processes on the wait queue are
> sleeping waiting to service things... but what if the wait queue governs
> access to a resource, and all the processes on that wait queue need access to
> that resource to do things? Some of the processes waiting for it may never
> get a go, and so necessary work may be left undone.
>
You are right. I made the wrong assumption. But we indeed need some
primitive to add wait_queue at the head of the wait_queue_head, and I
know epoll needs it, at least.
fs/eventpoll.c: 1443.
wait.flags |= WQ_FLAG_EXCLUSIVE;
__add_wait_queue(&ep->wq, &wait);
--
Regards,
Changli Gao(xiaosuo@gmail.com)
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [RFC] sched: implement the exclusive wait queue as a LIFO queue
2010-04-28 9:29 ` David Howells
2010-04-28 11:17 ` Changli Gao
@ 2010-04-28 13:21 ` David Howells
1 sibling, 0 replies; 24+ messages in thread
From: David Howells @ 2010-04-28 13:21 UTC (permalink / raw)
To: Changli Gao
Cc: dhowells, Yong Zhang, Xiaotian Feng, Ingo Molnar, Alexander Viro,
Andrew Morton, Eric W. Biederman, Davide Libenzi, Roland Dreier,
Stefan Richter, Peter Zijlstra, David S. Miller, Eric Dumazet,
Christoph Lameter, Andreas Herrmann, Thomas Gleixner,
Takashi Iwai, linux-fsdevel, linux-kernel
Changli Gao <xiaosuo@gmail.com> wrote:
> You are right. I made the wrong assumption. But we indeed need some
> primitive to add wait_queue at the head of the wait_queue_head, and I
> know epoll needs it, at least.
>
> fs/eventpoll.c: 1443.
> wait.flags |= WQ_FLAG_EXCLUSIVE;
> __add_wait_queue(&ep->wq, &wait);
You can add at either end of the wait queue with __add_wait_queue() and
__add_wait_queue_tail().
David
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [RFC] sched: implement the exclusive wait queue as a LIFO queue
2010-04-28 11:17 ` Changli Gao
@ 2010-04-28 13:21 ` Jamie Lokier
2010-04-28 13:42 ` Changli Gao
2010-04-28 18:57 ` Davide Libenzi
1 sibling, 1 reply; 24+ messages in thread
From: Jamie Lokier @ 2010-04-28 13:21 UTC (permalink / raw)
To: Changli Gao
Cc: David Howells, Yong Zhang, Xiaotian Feng, Ingo Molnar,
Alexander Viro, Andrew Morton, Eric W. Biederman, Davide Libenzi,
Roland Dreier, Stefan Richter, Peter Zijlstra, David S. Miller,
Eric Dumazet, Christoph Lameter, Andreas Herrmann,
Thomas Gleixner, Takashi Iwai, linux-fsdevel, linux-kernel
Changli Gao wrote:
> On Wed, Apr 28, 2010 at 5:29 PM, David Howells <dhowells@redhat.com> wrote:
> > Changli Gao <xiaosuo@gmail.com> wrote:
> >
> >> If there isn't enough work to be done, we'd better not disrupt them
> >> and leave them sleeping forever to keep the scheduler happier. Do we
> >> have reason to keep fair to all the workers? Does it have benefit?
> >
> > You've made one important assumption: the processes on the wait queue are
> > sleeping waiting to service things... but what if the wait queue governs
> > access to a resource, and all the processes on that wait queue need access to
> > that resource to do things? Some of the processes waiting for it may never
> > get a go, and so necessary work may be left undone.
> >
>
> You are right. I made the wrong assumption. But we indeed need some
> primitive to add wait_queue at the head of the wait_queue_head, and I
> know epoll needs it, at least.
>
> fs/eventpoll.c: 1443.
> wait.flags |= WQ_FLAG_EXCLUSIVE;
> __add_wait_queue(&ep->wq, &wait);
The same thing about assumptions applies here. The userspace process
may be waiting for an epoll condition to get access to a resource,
rather than being a worker thread interchangeable with others.
For example, userspace might be using a pipe as a signal-safe lock, or
signal-safe multi-token semaphore, and epoll to wait for that pipe.
WQ_FLAG_EXCLUSIVE means there is no point waking all tasks, to avoid a
pointless thundering herd. It doesn't mean unfairness is ok.
The LIFO idea _might_ make sense for interchangeable worker-thread
situations - including userspace. It would make sense for pipe
waiters, socket waiters (especially accept), etc.
Do you have any measurements which showing the LIFO mode performing
better than FIFO, and by how much?
-- Jamie
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [RFC] sched: implement the exclusive wait queue as a LIFO queue
2010-04-28 13:21 ` Jamie Lokier
@ 2010-04-28 13:42 ` Changli Gao
2010-04-28 15:25 ` Jamie Lokier
0 siblings, 1 reply; 24+ messages in thread
From: Changli Gao @ 2010-04-28 13:42 UTC (permalink / raw)
To: Jamie Lokier
Cc: David Howells, Yong Zhang, Xiaotian Feng, Ingo Molnar,
Alexander Viro, Andrew Morton, Eric W. Biederman, Davide Libenzi,
Roland Dreier, Stefan Richter, Peter Zijlstra, David S. Miller,
Eric Dumazet, Christoph Lameter, Andreas Herrmann,
Thomas Gleixner, Takashi Iwai, linux-fsdevel, linux-kernel
On Wed, Apr 28, 2010 at 9:21 PM, Jamie Lokier <jamie@shareable.org> wrote:
> Changli Gao wrote:
>>
>> fs/eventpoll.c: 1443.
>> wait.flags |= WQ_FLAG_EXCLUSIVE;
>> __add_wait_queue(&ep->wq, &wait);
>
> The same thing about assumptions applies here. The userspace process
> may be waiting for an epoll condition to get access to a resource,
> rather than being a worker thread interchangeable with others.
Oh, the lines above are the current ones. So the assumptions applies
and works here.
>
> For example, userspace might be using a pipe as a signal-safe lock, or
> signal-safe multi-token semaphore, and epoll to wait for that pipe.
>
> WQ_FLAG_EXCLUSIVE means there is no point waking all tasks, to avoid a
> pointless thundering herd. It doesn't mean unfairness is ok.
The users should not make any assumption about the waking up sequence,
neither LIFO nor FIFO.
>
> The LIFO idea _might_ make sense for interchangeable worker-thread
> situations - including userspace. It would make sense for pipe
> waiters, socket waiters (especially accept), etc.
Yea, and my following patches are for socket waiters.
>
> Do you have any measurements which showing the LIFO mode performing
> better than FIFO, and by how much?
>
I didn't do any test yet. But some work done by LSE project years ago
showed that it is better.
http://lse.sourceforge.net/io/aionotes.txt
" Also in view of
better cache utilization the wake queue mechanism is LIFO by default.
(A new exclusive LIFO wakeup option has been introduced for this purpose)"
--
Regards,
Changli Gao(xiaosuo@gmail.com)
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [RFC] sched: implement the exclusive wait queue as a LIFO queue
2010-04-28 9:34 ` David Howells
@ 2010-04-28 13:47 ` Changli Gao
0 siblings, 0 replies; 24+ messages in thread
From: Changli Gao @ 2010-04-28 13:47 UTC (permalink / raw)
To: David Howells
Cc: Ingo Molnar, Alexander Viro, Andrew Morton, Eric W. Biederman,
Davide Libenzi, Roland Dreier, Stefan Richter, Peter Zijlstra,
David S. Miller, Eric Dumazet, Christoph Lameter,
Andreas Herrmann, Thomas Gleixner, Takashi Iwai, linux-fsdevel,
linux-kernel
On Wed, Apr 28, 2010 at 5:34 PM, David Howells <dhowells@redhat.com> wrote:
>
> Can you split the wait queue code and differentiate exclusive wait queues with
> LIFO functionality from wait queues with FIFO functionality. I can see why
> your suggestion is desirable.
>
OK. I will use two lists: one for non-exclusive wait queues, and the
other for exclusive wait queues, and I'll add new interfaces for LIFO
functionality instead of changing the current interfaces.
--
Regards,
Changli Gao(xiaosuo@gmail.com)
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [RFC] sched: implement the exclusive wait queue as a LIFO queue
2010-04-28 9:32 ` David Howells
@ 2010-04-28 13:56 ` Changli Gao
2010-04-28 14:06 ` David Howells
1 sibling, 0 replies; 24+ messages in thread
From: Changli Gao @ 2010-04-28 13:56 UTC (permalink / raw)
To: David Howells
Cc: Ingo Molnar, Alexander Viro, Andrew Morton, Eric W. Biederman,
Davide Libenzi, Roland Dreier, Stefan Richter, Peter Zijlstra,
David S. Miller, Eric Dumazet, Christoph Lameter,
Andreas Herrmann, Thomas Gleixner, Takashi Iwai, linux-fsdevel,
linux-kernel
On Wed, Apr 28, 2010 at 5:32 PM, David Howells <dhowells@redhat.com> wrote:
> Changli Gao <xiaosuo@gmail.com> wrote:
>
>> @@ -50,6 +48,7 @@ struct wait_bit_queue {
>> struct __wait_queue_head {
>> spinlock_t lock;
>> struct list_head task_list;
>> + struct list_head task_list_ex;
>
> It would be preferable it if you could avoid making struct __wait_queue_head
> bigger. That will increase the size of a lot of things.
>
I don't know how to do that, as maybe there are non-exclusive and
exclusive wait queues in the same wait queue head. If we want to
enqueue exclusive wait queues at the head of exclusive queues, we have
to know where the head is, otherwise, we have to loop to find the head
when enqueuing.
--
Regards,
Changli Gao(xiaosuo@gmail.com)
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [RFC] sched: implement the exclusive wait queue as a LIFO queue
2010-04-28 9:32 ` David Howells
2010-04-28 13:56 ` Changli Gao
@ 2010-04-28 14:06 ` David Howells
2010-04-28 14:53 ` Changli Gao
2010-04-28 15:00 ` David Howells
1 sibling, 2 replies; 24+ messages in thread
From: David Howells @ 2010-04-28 14:06 UTC (permalink / raw)
To: Changli Gao
Cc: dhowells, Ingo Molnar, Alexander Viro, Andrew Morton,
Eric W. Biederman, Davide Libenzi, Roland Dreier, Stefan Richter,
Peter Zijlstra, David S. Miller, Eric Dumazet, Christoph Lameter,
Andreas Herrmann, Thomas Gleixner, Takashi Iwai, linux-fsdevel,
linux-kernel
Changli Gao <xiaosuo@gmail.com> wrote:
> I don't know how to do that, as maybe there are non-exclusive and
> exclusive wait queues in the same wait queue head. If we want to
> enqueue exclusive wait queues at the head of exclusive queues, we have
> to know where the head is, otherwise, we have to loop to find the head
> when enqueuing.
I suspect you really want to have the semantics defined per-queue. _Either_ a
queue is FIFO (such as processes waiting for a resource so they can do
something with it) _or_ it is LIFO (such as a pool of processes waiting to be
given work).
How often do the two actually mix? And if they do, is that really an error?
David
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [RFC] sched: implement the exclusive wait queue as a LIFO queue
2010-04-28 14:06 ` David Howells
@ 2010-04-28 14:53 ` Changli Gao
2010-04-28 15:00 ` David Howells
1 sibling, 0 replies; 24+ messages in thread
From: Changli Gao @ 2010-04-28 14:53 UTC (permalink / raw)
To: David Howells
Cc: Ingo Molnar, Alexander Viro, Andrew Morton, Eric W. Biederman,
Davide Libenzi, Roland Dreier, Stefan Richter, Peter Zijlstra,
David S. Miller, Eric Dumazet, Christoph Lameter,
Andreas Herrmann, Thomas Gleixner, Takashi Iwai, linux-fsdevel,
linux-kernel
On Wed, Apr 28, 2010 at 10:06 PM, David Howells <dhowells@redhat.com> wrote:
>
> I suspect you really want to have the semantics defined per-queue. _Either_ a
> queue is FIFO (such as processes waiting for a resource so they can do
> something with it) _or_ it is LIFO (such as a pool of processes waiting to be
> given work).
>
> How often do the two actually mix? And if they do, is that really an error?
>
The sock.sk_sleep is used by exclusive and non-exclusive wait queues.
exclusive and non-exclusive is identified by wait queues, not wait
queue heads. Maybe there is a historical reason. It is much like a
hack.
--
Regards,
Changli Gao(xiaosuo@gmail.com)
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [RFC] sched: implement the exclusive wait queue as a LIFO queue
2010-04-28 14:06 ` David Howells
2010-04-28 14:53 ` Changli Gao
@ 2010-04-28 15:00 ` David Howells
2010-04-28 15:33 ` Changli Gao
1 sibling, 1 reply; 24+ messages in thread
From: David Howells @ 2010-04-28 15:00 UTC (permalink / raw)
To: Changli Gao
Cc: dhowells, Ingo Molnar, Alexander Viro, Andrew Morton,
Eric W. Biederman, Davide Libenzi, Roland Dreier, Stefan Richter,
Peter Zijlstra, David S. Miller, Eric Dumazet, Christoph Lameter,
Andreas Herrmann, Thomas Gleixner, Takashi Iwai, linux-fsdevel,
linux-kernel
Changli Gao <xiaosuo@gmail.com> wrote:
> > How often do the two actually mix? And if they do, is that really an
> > error?
>
> The sock.sk_sleep is used by exclusive and non-exclusive wait queues.
> exclusive and non-exclusive is identified by wait queues, not wait
> queue heads. Maybe there is a historical reason. It is much like a
> hack.
But is it actually used in both modes at the same time?
David
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [RFC] sched: implement the exclusive wait queue as a LIFO queue
2010-04-28 13:42 ` Changli Gao
@ 2010-04-28 15:25 ` Jamie Lokier
2010-04-28 15:49 ` Changli Gao
0 siblings, 1 reply; 24+ messages in thread
From: Jamie Lokier @ 2010-04-28 15:25 UTC (permalink / raw)
To: Changli Gao
Cc: David Howells, Yong Zhang, Xiaotian Feng, Ingo Molnar,
Alexander Viro, Andrew Morton, Eric W. Biederman, Davide Libenzi,
Roland Dreier, Stefan Richter, Peter Zijlstra, David S. Miller,
Eric Dumazet, Christoph Lameter, Andreas Herrmann,
Thomas Gleixner, Takashi Iwai, linux-fsdevel, linux-kernel
Changli Gao wrote:
> On Wed, Apr 28, 2010 at 9:21 PM, Jamie Lokier <jamie@shareable.org> wrote:
> > Changli Gao wrote:
> >>
> >> fs/eventpoll.c: 1443.
> >> wait.flags |= WQ_FLAG_EXCLUSIVE;
> >> __add_wait_queue(&ep->wq, &wait);
> >
> > The same thing about assumptions applies here. The userspace process
> > may be waiting for an epoll condition to get access to a resource,
> > rather than being a worker thread interchangeable with others.
>
> Oh, the lines above are the current ones. So the assumptions applies
> and works here.
No, because WQ_FLAG_EXCLUSIVE doesn't have your LIFO semantic at the moment.
Your patch changes the behaviour of epoll, though I don't know if it
matters. Perhaps all programs which have multiple tasks waiting on
the same epoll fd are "interchangeable worker thread" types anyway :-)
> > For example, userspace might be using a pipe as a signal-safe lock, or
> > signal-safe multi-token semaphore, and epoll to wait for that pipe.
> >
> > WQ_FLAG_EXCLUSIVE means there is no point waking all tasks, to avoid a
> > pointless thundering herd. It doesn't mean unfairness is ok.
>
> The users should not make any assumption about the waking up sequence,
> neither LIFO nor FIFO.
Correct, but they should be able to assume non-starvation (eventual
progress) for all waiters.
It's one of those subtle things, possibly a unixy thing: Non-RT tasks
should always make progress when the competition is just other non-RT
tasks, even if the progress is slow.
Starvation can spread out beyond the starved process, to cause
priority inversions in other tasks that are waiting on a resource
locked by the starved process. Among other things, that can cause
higher priority tasks, and RT priority tasks, to block permanently.
Very unpleasant.
> > The LIFO idea _might_ make sense for interchangeable worker-thread
> > situations - including userspace. It would make sense for pipe
> > waiters, socket waiters (especially accept), etc.
>
> Yea, and my following patches are for socket waiters.
Occasionally unix socketpairs are occasionally used in the above ways too.
I'm not against your patch, but I worry that starvation is a new
semantic, and it may have a significant effect on something - either
in the kernel, or in userspace which is harder to check.
> > Do you have any measurements which showing the LIFO mode performing
> > better than FIFO, and by how much?
>
> I didn't do any test yet. But some work done by LSE project years ago
> showed that it is better.
>
> http://lse.sourceforge.net/io/aionotes.txt
>
> " Also in view of
> better cache utilization the wake queue mechanism is LIFO by default.
> (A new exclusive LIFO wakeup option has been introduced for this purpose)"
I suspect it's possible to combine LIFO-ish and FIFO-ish queuing to
prevent starvation while getting some of the locality benefit.
Something like add-LIFO and increment a small counter in the next wait
entry, but never add in front of an entry whose counter has reached
MAX_LIFO_WAITERS? :-)
-- Jamie
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [RFC] sched: implement the exclusive wait queue as a LIFO queue
2010-04-28 15:00 ` David Howells
@ 2010-04-28 15:33 ` Changli Gao
0 siblings, 0 replies; 24+ messages in thread
From: Changli Gao @ 2010-04-28 15:33 UTC (permalink / raw)
To: David Howells
Cc: Ingo Molnar, Alexander Viro, Andrew Morton, Eric W. Biederman,
Davide Libenzi, Roland Dreier, Stefan Richter, Peter Zijlstra,
David S. Miller, Eric Dumazet, Christoph Lameter,
Andreas Herrmann, Thomas Gleixner, Takashi Iwai, linux-fsdevel,
linux-kernel
On Wed, Apr 28, 2010 at 11:00 PM, David Howells <dhowells@redhat.com> wrote:
>
> But is it actually used in both modes at the same time?
>
It should be seldom in good applications. So I only need to add a
function, like this:
add_wait_queue_head_exclusive(wq, wait)
first_exclusive = find_the_first_exclusive(wq); // it is fast, as
there should no or few non-exclusive wait queues
list_add(wait, first_exclusive);
--
Regards,
Changli Gao(xiaosuo@gmail.com)
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [RFC] sched: implement the exclusive wait queue as a LIFO queue
2010-04-28 15:25 ` Jamie Lokier
@ 2010-04-28 15:49 ` Changli Gao
0 siblings, 0 replies; 24+ messages in thread
From: Changli Gao @ 2010-04-28 15:49 UTC (permalink / raw)
To: Jamie Lokier
Cc: David Howells, Yong Zhang, Xiaotian Feng, Ingo Molnar,
Alexander Viro, Andrew Morton, Eric W. Biederman, Davide Libenzi,
Roland Dreier, Stefan Richter, Peter Zijlstra, David S. Miller,
Eric Dumazet, Christoph Lameter, Andreas Herrmann,
Thomas Gleixner, Takashi Iwai, linux-fsdevel, linux-kernel
On Wed, Apr 28, 2010 at 11:25 PM, Jamie Lokier <jamie@shareable.org> wrote:
> Changli Gao wrote:
>> On Wed, Apr 28, 2010 at 9:21 PM, Jamie Lokier <jamie@shareable.org> wrote:
>> > Changli Gao wrote:
>> >>
>> >> fs/eventpoll.c: 1443.
>> >> wait.flags |= WQ_FLAG_EXCLUSIVE;
>> >> __add_wait_queue(&ep->wq, &wait);
>> >
>> > The same thing about assumptions applies here. The userspace process
>> > may be waiting for an epoll condition to get access to a resource,
>> > rather than being a worker thread interchangeable with others.
>>
>> Oh, the lines above are the current ones. So the assumptions applies
>> and works here.
>
> No, because WQ_FLAG_EXCLUSIVE doesn't have your LIFO semantic at the moment.
>
> Your patch changes the behaviour of epoll, though I don't know if it
> matters. Perhaps all programs which have multiple tasks waiting on
> the same epoll fd are "interchangeable worker thread" types anyway :-)
>
No. You are wrong. I meant epoll implemented LIFO on its own. You
should check the code. :)
>> > For example, userspace might be using a pipe as a signal-safe lock, or
>> > signal-safe multi-token semaphore, and epoll to wait for that pipe.
>> >
>> > WQ_FLAG_EXCLUSIVE means there is no point waking all tasks, to avoid a
>> > pointless thundering herd. It doesn't mean unfairness is ok.
>>
>> The users should not make any assumption about the waking up sequence,
>> neither LIFO nor FIFO.
>
> Correct, but they should be able to assume non-starvation (eventual
> progress) for all waiters.
>
> It's one of those subtle things, possibly a unixy thing: Non-RT tasks
> should always make progress when the competition is just other non-RT
> tasks, even if the progress is slow.
>
> Starvation can spread out beyond the starved process, to cause
> priority inversions in other tasks that are waiting on a resource
> locked by the starved process. Among other things, that can cause
> higher priority tasks, and RT priority tasks, to block permanently.
> Very unpleasant.
>
>> > The LIFO idea _might_ make sense for interchangeable worker-thread
>> > situations - including userspace. It would make sense for pipe
>> > waiters, socket waiters (especially accept), etc.
>>
>> Yea, and my following patches are for socket waiters.
>
> Occasionally unix socketpairs are occasionally used in the above ways too.
>
> I'm not against your patch, but I worry that starvation is a new
> semantic, and it may have a significant effect on something - either
> in the kernel, or in userspace which is harder to check.
Thanks for your reminding.
>
> I suspect it's possible to combine LIFO-ish and FIFO-ish queuing to
> prevent starvation while getting some of the locality benefit.
> Something like add-LIFO and increment a small counter in the next wait
> entry, but never add in front of an entry whose counter has reached
> MAX_LIFO_WAITERS? :-)
>
It is a little complex, and I'll keep it simple and improve it when necessary.
--
Regards,
Changli Gao(xiaosuo@gmail.com)
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [RFC] sched: implement the exclusive wait queue as a LIFO queue
2010-04-28 11:17 ` Changli Gao
2010-04-28 13:21 ` Jamie Lokier
@ 2010-04-28 18:57 ` Davide Libenzi
1 sibling, 0 replies; 24+ messages in thread
From: Davide Libenzi @ 2010-04-28 18:57 UTC (permalink / raw)
To: Changli Gao
Cc: David Howells, Yong Zhang, Xiaotian Feng, Ingo Molnar,
Alexander Viro, Andrew Morton, Eric W. Biederman, Roland Dreier,
Stefan Richter, Peter Zijlstra, David S. Miller, Eric Dumazet,
Christoph Lameter, Andreas Herrmann, Thomas Gleixner,
Takashi Iwai, linux-fsdevel, Linux Kernel Mailing List
[-- Attachment #1: Type: TEXT/PLAIN, Size: 1243 bytes --]
On Wed, 28 Apr 2010, Changli Gao wrote:
> On Wed, Apr 28, 2010 at 5:29 PM, David Howells <dhowells@redhat.com> wrote:
> > Changli Gao <xiaosuo@gmail.com> wrote:
> >
> >> If there isn't enough work to be done, we'd better not disrupt them
> >> and leave them sleeping forever to keep the scheduler happier. Do we
> >> have reason to keep fair to all the workers? Does it have benefit?
> >
> > You've made one important assumption: the processes on the wait queue are
> > sleeping waiting to service things... but what if the wait queue governs
> > access to a resource, and all the processes on that wait queue need access to
> > that resource to do things? Some of the processes waiting for it may never
> > get a go, and so necessary work may be left undone.
> >
>
> You are right. I made the wrong assumption. But we indeed need some
> primitive to add wait_queue at the head of the wait_queue_head, and I
> know epoll needs it, at least.
>
> fs/eventpoll.c: 1443.
> wait.flags |= WQ_FLAG_EXCLUSIVE;
> __add_wait_queue(&ep->wq, &wait);
I'm not sure one user deserves a new function, but like it has been
noticed, the patch for that should eventually be totally isolated from
other bits.
- Davide
^ permalink raw reply [flat|nested] 24+ messages in thread
end of thread, other threads:[~2010-04-28 18:57 UTC | newest]
Thread overview: 24+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-04-28 5:03 [RFC] sched: implement the exclusive wait queue as a LIFO queue Changli Gao
2010-04-28 6:22 ` Changli Gao
2010-04-28 8:05 ` Changli Gao
2010-04-28 7:47 ` Xiaotian Feng
2010-04-28 7:52 ` Changli Gao
2010-04-28 8:15 ` Yong Zhang
2010-04-28 8:23 ` Changli Gao
2010-04-28 9:25 ` Johannes Weiner
2010-04-28 9:29 ` David Howells
2010-04-28 11:17 ` Changli Gao
2010-04-28 13:21 ` Jamie Lokier
2010-04-28 13:42 ` Changli Gao
2010-04-28 15:25 ` Jamie Lokier
2010-04-28 15:49 ` Changli Gao
2010-04-28 18:57 ` Davide Libenzi
2010-04-28 13:21 ` David Howells
2010-04-28 9:32 ` David Howells
2010-04-28 13:56 ` Changli Gao
2010-04-28 14:06 ` David Howells
2010-04-28 14:53 ` Changli Gao
2010-04-28 15:00 ` David Howells
2010-04-28 15:33 ` Changli Gao
2010-04-28 9:34 ` David Howells
2010-04-28 13:47 ` Changli Gao
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).