* [PATCH 1/5] futex: checkpatch cleanup
@ 2008-06-11 20:49 Daniel Walker
2008-06-11 20:49 ` [PATCH 2/5] futex: update prio on requeue Daniel Walker
` (3 more replies)
0 siblings, 4 replies; 23+ messages in thread
From: Daniel Walker @ 2008-06-11 20:49 UTC (permalink / raw)
To: linux-kernel; +Cc: Ulrich Drepper, Thomas Gleixner, Arjan van de Ven
[-- Attachment #1: futex-checkpatch-cleanup.patch --]
[-- Type: text/plain, Size: 4766 bytes --]
Signed-off-by: Daniel Walker <dwalker@mvista.com>
---
kernel/futex.c | 52 +++++++++++++++++++++++++++-------------------------
1 file changed, 27 insertions(+), 25 deletions(-)
Index: linux-2.6.25/kernel/futex.c
===================================================================
--- linux-2.6.25.orig/kernel/futex.c
+++ linux-2.6.25/kernel/futex.c
@@ -145,7 +145,7 @@ static inline void futex_unlock_mm(struc
*/
static struct futex_hash_bucket *hash_futex(union futex_key *key)
{
- u32 hash = jhash2((u32*)&key->both.word,
+ u32 hash = jhash2((u32 *)&key->both.word,
(sizeof(key->both.word)+sizeof(key->both.ptr))/4,
key->both.offset);
return &futex_queues[hash & ((1 << FUTEX_HASHBITS)-1)];
@@ -234,7 +234,8 @@ static int get_futex_key(u32 __user *uad
* mappings of _writable_ handles.
*/
if (likely(!(vma->vm_flags & VM_MAYSHARE))) {
- key->both.offset |= FUT_OFF_MMSHARED; /* reference taken on mm */
+ /* reference taken on mm */
+ key->both.offset |= FUT_OFF_MMSHARED;
key->private.mm = mm;
key->private.address = address;
return 0;
@@ -277,12 +278,12 @@ static void get_futex_key_refs(union fut
if (key->both.ptr == NULL)
return;
switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
- case FUT_OFF_INODE:
- atomic_inc(&key->shared.inode->i_count);
- break;
- case FUT_OFF_MMSHARED:
- atomic_inc(&key->private.mm->mm_count);
- break;
+ case FUT_OFF_INODE:
+ atomic_inc(&key->shared.inode->i_count);
+ break;
+ case FUT_OFF_MMSHARED:
+ atomic_inc(&key->private.mm->mm_count);
+ break;
}
}
@@ -295,12 +296,12 @@ static void drop_futex_key_refs(union fu
if (!key->both.ptr)
return;
switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
- case FUT_OFF_INODE:
- iput(key->shared.inode);
- break;
- case FUT_OFF_MMSHARED:
- mmdrop(key->private.mm);
- break;
+ case FUT_OFF_INODE:
+ iput(key->shared.inode);
+ break;
+ case FUT_OFF_MMSHARED:
+ mmdrop(key->private.mm);
+ break;
}
}
@@ -333,7 +334,7 @@ static int get_futex_value_locked(u32 *d
static int futex_handle_fault(unsigned long address,
struct rw_semaphore *fshared, int attempt)
{
- struct vm_area_struct * vma;
+ struct vm_area_struct *vma;
struct mm_struct *mm = current->mm;
int ret = -EFAULT;
@@ -391,7 +392,7 @@ static int refill_pi_state_cache(void)
return 0;
}
-static struct futex_pi_state * alloc_pi_state(void)
+static struct futex_pi_state *alloc_pi_state(void)
{
struct futex_pi_state *pi_state = current->pi_state_cache;
@@ -436,7 +437,7 @@ static void free_pi_state(struct futex_p
* Look up the task based on what TID userspace gave us.
* We dont trust it.
*/
-static struct task_struct * futex_find_get_task(pid_t pid)
+static struct task_struct *futex_find_get_task(pid_t pid)
{
struct task_struct *p;
@@ -742,7 +743,7 @@ static int futex_wake(u32 __user *uaddr,
head = &hb->chain;
plist_for_each_entry_safe(this, next, head, list) {
- if (match_futex (&this->key, &key)) {
+ if (match_futex(&this->key, &key)) {
if (this->pi_state) {
ret = -EINVAL;
break;
@@ -848,7 +849,7 @@ retry:
head = &hb1->chain;
plist_for_each_entry_safe(this, next, head, list) {
- if (match_futex (&this->key, &key1)) {
+ if (match_futex(&this->key, &key1)) {
wake_futex(this);
if (++ret >= nr_wake)
break;
@@ -860,7 +861,7 @@ retry:
op_ret = 0;
plist_for_each_entry_safe(this, next, head, list) {
- if (match_futex (&this->key, &key2)) {
+ if (match_futex(&this->key, &key2)) {
wake_futex(this);
if (++op_ret >= nr_wake2)
break;
@@ -938,7 +939,7 @@ static int futex_requeue(u32 __user *uad
head1 = &hb1->chain;
plist_for_each_entry_safe(this, next, head1, list) {
- if (!match_futex (&this->key, &key1))
+ if (!match_futex(&this->key, &key1))
continue;
if (++ret <= nr_wake) {
wake_futex(this);
@@ -1648,7 +1649,8 @@ retry_unlocked:
* anyone else up:
*/
if (!(uval & FUTEX_OWNER_DIED))
- uval = cmpxchg_futex_value_locked(uaddr, task_pid_vnr(current), 0);
+ uval = cmpxchg_futex_value_locked(uaddr,
+ task_pid_vnr(current), 0);
if (unlikely(uval == -EFAULT))
@@ -1667,7 +1669,7 @@ retry_unlocked:
head = &hb->chain;
plist_for_each_entry_safe(this, next, head, list) {
- if (!match_futex (&this->key, &key))
+ if (!match_futex(&this->key, &key))
continue;
ret = wake_futex_pi(uaddr, uval, this);
/*
@@ -1913,8 +1915,8 @@ void exit_robust_list(struct task_struct
* don't process it twice:
*/
if (entry != pending)
- if (handle_futex_death((void __user *)entry + futex_offset,
- curr, pi))
+ if (handle_futex_death((void __user *)entry +
+ futex_offset, curr, pi))
return;
if (rc)
return;
--
^ permalink raw reply [flat|nested] 23+ messages in thread
* [PATCH 2/5] futex: update prio on requeue
2008-06-11 20:49 [PATCH 1/5] futex: checkpatch cleanup Daniel Walker
@ 2008-06-11 20:49 ` Daniel Walker
2008-06-12 5:22 ` Peter Zijlstra
2008-06-11 20:49 ` [PATCH 3/5] mutex debug: add generic blocked_on usage Daniel Walker
` (2 subsequent siblings)
3 siblings, 1 reply; 23+ messages in thread
From: Daniel Walker @ 2008-06-11 20:49 UTC (permalink / raw)
To: linux-kernel; +Cc: Ulrich Drepper, Thomas Gleixner, Arjan van de Ven
[-- Attachment #1: futex-update-prio-on-requeue.patch --]
[-- Type: text/plain, Size: 848 bytes --]
Since the priority may have changed, the requeue is a good place to update
the priority since we're already deleting and adding to a new list.
Signed-off-by: Daniel Walker <dwalker@mvista.com>
---
kernel/futex.c | 3 +++
1 file changed, 3 insertions(+)
Index: linux-2.6.25/kernel/futex.c
===================================================================
--- linux-2.6.25.orig/kernel/futex.c
+++ linux-2.6.25/kernel/futex.c
@@ -980,7 +980,10 @@ static int futex_requeue(u32 __user *uad
* requeue.
*/
if (likely(head1 != &hb2->chain)) {
+ struct task_struct *task = this->task;
+ int prio = min(task->normal_prio, MAX_RT_PRIO);
plist_del(&this->list, &hb1->chain);
+ plist_node_init(&this->list, prio);
plist_add(&this->list, &hb2->chain);
this->lock_ptr = &hb2->lock;
#ifdef CONFIG_DEBUG_PI_LIST
--
^ permalink raw reply [flat|nested] 23+ messages in thread
* [PATCH 3/5] mutex debug: add generic blocked_on usage
2008-06-11 20:49 [PATCH 1/5] futex: checkpatch cleanup Daniel Walker
2008-06-11 20:49 ` [PATCH 2/5] futex: update prio on requeue Daniel Walker
@ 2008-06-11 20:49 ` Daniel Walker
2008-06-12 5:25 ` Peter Zijlstra
2008-06-11 20:49 ` [PATCH 4/5] rtmutex: " Daniel Walker
2008-06-11 20:49 ` [PATCH 5/5] futex: fix miss ordered wakeups Daniel Walker
3 siblings, 1 reply; 23+ messages in thread
From: Daniel Walker @ 2008-06-11 20:49 UTC (permalink / raw)
To: linux-kernel; +Cc: Ulrich Drepper, Thomas Gleixner, Arjan van de Ven
[-- Attachment #1: blocked_on-mutex.patch --]
[-- Type: text/plain, Size: 6045 bytes --]
There are a couple of blocked on type structures. One for mutexes, and one
for rtmutexes, and we also need one for futexes.
Instead of just adding another one to the task struct I combined them all into
a union. Since a waiter can only be blocked on one of the types at any given
time this should be safe.
I also usurped the pi_lock as the lock which protects all the blocked_on types.
Signed-off-by: Daniel Walker <dwalker@mvista.com>
---
include/linux/sched.h | 36 +++++++++++++++++++++++++++++++-----
kernel/fork.c | 2 --
kernel/mutex-debug.c | 25 ++++++++++++++++++-------
kernel/mutex-debug.h | 4 +++-
kernel/mutex.c | 6 +++++-
5 files changed, 57 insertions(+), 16 deletions(-)
Index: linux-2.6.25/include/linux/sched.h
===================================================================
--- linux-2.6.25.orig/include/linux/sched.h
+++ linux-2.6.25/include/linux/sched.h
@@ -1024,6 +1024,17 @@ struct sched_rt_entity {
#endif
};
+enum lock_waiter_type {
+ MUTEX_WAITER = 1,
+};
+
+struct lock_waiter_state {
+ enum lock_waiter_type lock_type;
+ union {
+ struct mutex_waiter *mutex_blocked_on;
+ };
+};
+
struct task_struct {
volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */
void *stack;
@@ -1201,7 +1212,7 @@ struct task_struct {
/* Protection of (de-)allocation: mm, files, fs, tty, keyrings */
spinlock_t alloc_lock;
- /* Protection of the PI data structures: */
+ /* Protects blocked_on field and PI waiters list. */
spinlock_t pi_lock;
#ifdef CONFIG_RT_MUTEXES
@@ -1211,10 +1222,12 @@ struct task_struct {
struct rt_mutex_waiter *pi_blocked_on;
#endif
-#ifdef CONFIG_DEBUG_MUTEXES
- /* mutex deadlock detection */
- struct mutex_waiter *blocked_on;
-#endif
+ /*
+ * Deadlock detection and priority inheritance handling,
+ * and any other out of line mutex operations
+ */
+ struct lock_waiter_state *blocked_on;
+
#ifdef CONFIG_TRACE_IRQFLAGS
unsigned int irq_events;
int hardirqs_enabled;
@@ -1306,6 +1319,19 @@ struct task_struct {
#endif
};
+
+/*
+ * set_blocked_on - Set the blocked on field in the task struct.
+ */
+static inline void
+set_blocked_on(struct task_struct *p, struct lock_waiter_state *blocked_on)
+{
+ spin_lock(&p->pi_lock);
+ p->blocked_on = blocked_on;
+ spin_unlock(&p->pi_lock);
+}
+
+
/*
* Priority of a process goes from 0..MAX_PRIO-1, valid RT
* priority is 0..MAX_RT_PRIO-1, and SCHED_NORMAL/SCHED_BATCH
Index: linux-2.6.25/kernel/fork.c
===================================================================
--- linux-2.6.25.orig/kernel/fork.c
+++ linux-2.6.25/kernel/fork.c
@@ -1028,9 +1028,7 @@ static struct task_struct *copy_process(
p->lockdep_recursion = 0;
#endif
-#ifdef CONFIG_DEBUG_MUTEXES
p->blocked_on = NULL; /* not blocked yet */
-#endif
/* Perform scheduler related setup. Assign this task to a CPU. */
sched_fork(p, clone_flags);
Index: linux-2.6.25/kernel/mutex-debug.c
===================================================================
--- linux-2.6.25.orig/kernel/mutex-debug.c
+++ linux-2.6.25/kernel/mutex-debug.c
@@ -52,23 +52,34 @@ void debug_mutex_free_waiter(struct mute
memset(waiter, MUTEX_DEBUG_FREE, sizeof(*waiter));
}
-void debug_mutex_add_waiter(struct mutex *lock, struct mutex_waiter *waiter,
- struct thread_info *ti)
+void
+debug_mutex_add_waiter(struct mutex *lock,
+ struct lock_waiter_state *lock_waiter,
+ struct thread_info *ti)
{
+ struct task_struct *task = ti->task;
+
SMP_DEBUG_LOCKS_WARN_ON(!spin_is_locked(&lock->wait_lock));
/* Mark the current thread as blocked on the lock: */
- ti->task->blocked_on = waiter;
- waiter->lock = lock;
+ lock_waiter->mutex_blocked_on->lock = lock;
+ set_blocked_on(task, lock_waiter);
}
void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter,
struct thread_info *ti)
{
+ struct task_struct *task = ti->task;
+
+ spin_lock(&task->pi_lock);
+
DEBUG_LOCKS_WARN_ON(list_empty(&waiter->list));
- DEBUG_LOCKS_WARN_ON(waiter->task != ti->task);
- DEBUG_LOCKS_WARN_ON(ti->task->blocked_on != waiter);
- ti->task->blocked_on = NULL;
+ DEBUG_LOCKS_WARN_ON(waiter->task != task);
+ DEBUG_LOCKS_WARN_ON(task->blocked_on == NULL);
+ DEBUG_LOCKS_WARN_ON(task->blocked_on->mutex_blocked_on != waiter);
+
+ task->blocked_on = NULL;
+ spin_unlock(&task->pi_lock);
list_del_init(&waiter->list);
waiter->task = NULL;
Index: linux-2.6.25/kernel/mutex-debug.h
===================================================================
--- linux-2.6.25.orig/kernel/mutex-debug.h
+++ linux-2.6.25/kernel/mutex-debug.h
@@ -25,10 +25,12 @@ extern void debug_mutex_lock_common(stru
struct mutex_waiter *waiter);
extern void debug_mutex_wake_waiter(struct mutex *lock,
struct mutex_waiter *waiter);
+
extern void debug_mutex_free_waiter(struct mutex_waiter *waiter);
extern void debug_mutex_add_waiter(struct mutex *lock,
- struct mutex_waiter *waiter,
+ struct lock_waiter_state *lock_waiter,
struct thread_info *ti);
+
extern void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter,
struct thread_info *ti);
extern void debug_mutex_unlock(struct mutex *lock);
Index: linux-2.6.25/kernel/mutex.c
===================================================================
--- linux-2.6.25.orig/kernel/mutex.c
+++ linux-2.6.25/kernel/mutex.c
@@ -130,12 +130,16 @@ __mutex_lock_common(struct mutex *lock,
struct mutex_waiter waiter;
unsigned int old_val;
unsigned long flags;
+#ifdef CONFIG_DEBUG_MUTEXES
+ struct lock_waiter_state lock_waiter = {
+ .lock_type = MUTEX_WAITER, { .mutex_blocked_on = &waiter} };
+#endif
spin_lock_mutex(&lock->wait_lock, flags);
debug_mutex_lock_common(lock, &waiter);
mutex_acquire(&lock->dep_map, subclass, 0, ip);
- debug_mutex_add_waiter(lock, &waiter, task_thread_info(task));
+ debug_mutex_add_waiter(lock, &lock_waiter, task_thread_info(task));
/* add waiting tasks to the end of the waitqueue (FIFO): */
list_add_tail(&waiter.list, &lock->wait_list);
--
^ permalink raw reply [flat|nested] 23+ messages in thread
* [PATCH 4/5] rtmutex: add generic blocked_on usage
2008-06-11 20:49 [PATCH 1/5] futex: checkpatch cleanup Daniel Walker
2008-06-11 20:49 ` [PATCH 2/5] futex: update prio on requeue Daniel Walker
2008-06-11 20:49 ` [PATCH 3/5] mutex debug: add generic blocked_on usage Daniel Walker
@ 2008-06-11 20:49 ` Daniel Walker
2008-06-11 20:49 ` [PATCH 5/5] futex: fix miss ordered wakeups Daniel Walker
3 siblings, 0 replies; 23+ messages in thread
From: Daniel Walker @ 2008-06-11 20:49 UTC (permalink / raw)
To: linux-kernel; +Cc: Ulrich Drepper, Thomas Gleixner, Arjan van de Ven
[-- Attachment #1: blocked_on-rtmutex.patch --]
[-- Type: text/plain, Size: 5712 bytes --]
Modify the rtmutex to use the generic blocked_on field.
Signed-off-by: Daniel Walker <dwalker@mvista.com>
---
include/linux/sched.h | 4 ++--
kernel/fork.c | 1 -
kernel/rtmutex.c | 35 ++++++++++++++++++++++++-----------
3 files changed, 26 insertions(+), 14 deletions(-)
Index: linux-2.6.25/include/linux/sched.h
===================================================================
--- linux-2.6.25.orig/include/linux/sched.h
+++ linux-2.6.25/include/linux/sched.h
@@ -1026,12 +1026,14 @@ struct sched_rt_entity {
enum lock_waiter_type {
MUTEX_WAITER = 1,
+ RT_MUTEX_WAITER,
};
struct lock_waiter_state {
enum lock_waiter_type lock_type;
union {
struct mutex_waiter *mutex_blocked_on;
+ struct rt_mutex_waiter *rt_blocked_on;
};
};
@@ -1218,8 +1220,6 @@ struct task_struct {
#ifdef CONFIG_RT_MUTEXES
/* PI waiters blocked on a rt_mutex held by this task */
struct plist_head pi_waiters;
- /* Deadlock detection and priority inheritance handling */
- struct rt_mutex_waiter *pi_blocked_on;
#endif
/*
Index: linux-2.6.25/kernel/fork.c
===================================================================
--- linux-2.6.25.orig/kernel/fork.c
+++ linux-2.6.25/kernel/fork.c
@@ -850,7 +850,6 @@ static void rt_mutex_init_task(struct ta
spin_lock_init(&p->pi_lock);
#ifdef CONFIG_RT_MUTEXES
plist_head_init(&p->pi_waiters, &p->pi_lock);
- p->pi_blocked_on = NULL;
#endif
}
Index: linux-2.6.25/kernel/rtmutex.c
===================================================================
--- linux-2.6.25.orig/kernel/rtmutex.c
+++ linux-2.6.25/kernel/rtmutex.c
@@ -74,6 +74,14 @@ static void fixup_rt_mutex_waiters(struc
clear_rt_mutex_waiters(lock);
}
+static
+struct rt_mutex_waiter *rt_mutex_get_waiter(struct task_struct *task)
+{
+ if (task->blocked_on && task->blocked_on->lock_type == RT_MUTEX_WAITER)
+ return task->blocked_on->rt_blocked_on;
+ return NULL;
+}
+
/*
* We can speed up the acquire/release, if the architecture
* supports cmpxchg and if there's no debugging state to be set up
@@ -197,7 +205,7 @@ static int rt_mutex_adjust_prio_chain(st
*/
spin_lock_irqsave(&task->pi_lock, flags);
- waiter = task->pi_blocked_on;
+ waiter = rt_mutex_get_waiter(task);
/*
* Check whether the end of the boosting chain has been
* reached or the state of the chain has changed while we
@@ -411,6 +419,7 @@ static int try_to_take_rt_mutex(struct r
*/
static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
struct rt_mutex_waiter *waiter,
+ struct lock_waiter_state *lock_waiter,
int detect_deadlock)
{
struct task_struct *owner = rt_mutex_owner(lock);
@@ -430,7 +439,7 @@ static int task_blocks_on_rt_mutex(struc
top_waiter = rt_mutex_top_waiter(lock);
plist_add(&waiter->list_entry, &lock->wait_list);
- current->pi_blocked_on = waiter;
+ current->blocked_on = lock_waiter;
spin_unlock_irqrestore(¤t->pi_lock, flags);
@@ -440,7 +449,7 @@ static int task_blocks_on_rt_mutex(struc
plist_add(&waiter->pi_list_entry, &owner->pi_waiters);
__rt_mutex_adjust_prio(owner);
- if (owner->pi_blocked_on)
+ if (rt_mutex_get_waiter(owner))
chain_walk = 1;
spin_unlock_irqrestore(&owner->pi_lock, flags);
}
@@ -501,7 +510,7 @@ static void wakeup_next_waiter(struct rt
spin_unlock_irqrestore(¤t->pi_lock, flags);
/*
- * Clear the pi_blocked_on variable and enqueue a possible
+ * Clear the blocked_on variable and enqueue a possible
* waiter into the pi_waiters list of the pending owner. This
* prevents that in case the pending owner gets unboosted a
* waiter with higher priority than pending-owner->normal_prio
@@ -509,11 +518,12 @@ static void wakeup_next_waiter(struct rt
*/
spin_lock_irqsave(&pendowner->pi_lock, flags);
- WARN_ON(!pendowner->pi_blocked_on);
- WARN_ON(pendowner->pi_blocked_on != waiter);
- WARN_ON(pendowner->pi_blocked_on->lock != lock);
+ WARN_ON(!pendowner->blocked_on);
+ WARN_ON(pendowner->blocked_on->lock_type != RT_MUTEX_WAITER);
+ WARN_ON(pendowner->blocked_on->rt_blocked_on != waiter);
+ WARN_ON(pendowner->blocked_on->rt_blocked_on->lock != lock);
- pendowner->pi_blocked_on = NULL;
+ pendowner->blocked_on = NULL;
if (rt_mutex_has_waiters(lock)) {
struct rt_mutex_waiter *next;
@@ -542,7 +552,7 @@ static void remove_waiter(struct rt_mute
spin_lock_irqsave(¤t->pi_lock, flags);
plist_del(&waiter->list_entry, &lock->wait_list);
waiter->task = NULL;
- current->pi_blocked_on = NULL;
+ current->blocked_on = NULL;
spin_unlock_irqrestore(¤t->pi_lock, flags);
if (first && owner != current) {
@@ -559,7 +569,7 @@ static void remove_waiter(struct rt_mute
}
__rt_mutex_adjust_prio(owner);
- if (owner->pi_blocked_on)
+ if (rt_mutex_get_waiter(owner))
chain_walk = 1;
spin_unlock_irqrestore(&owner->pi_lock, flags);
@@ -592,7 +602,7 @@ void rt_mutex_adjust_pi(struct task_stru
spin_lock_irqsave(&task->pi_lock, flags);
- waiter = task->pi_blocked_on;
+ waiter = rt_mutex_get_waiter(task);
if (!waiter || waiter->list_entry.prio == task->prio) {
spin_unlock_irqrestore(&task->pi_lock, flags);
return;
@@ -614,6 +624,8 @@ rt_mutex_slowlock(struct rt_mutex *lock,
int detect_deadlock)
{
struct rt_mutex_waiter waiter;
+ struct lock_waiter_state lock_waiter = {
+ .lock_type = RT_MUTEX_WAITER, { .rt_blocked_on = &waiter} };
int ret = 0;
debug_rt_mutex_init_waiter(&waiter);
@@ -663,6 +675,7 @@ rt_mutex_slowlock(struct rt_mutex *lock,
*/
if (!waiter.task) {
ret = task_blocks_on_rt_mutex(lock, &waiter,
+ &lock_waiter,
detect_deadlock);
/*
* If we got woken up by the owner then start loop
--
^ permalink raw reply [flat|nested] 23+ messages in thread
* [PATCH 5/5] futex: fix miss ordered wakeups
2008-06-11 20:49 [PATCH 1/5] futex: checkpatch cleanup Daniel Walker
` (2 preceding siblings ...)
2008-06-11 20:49 ` [PATCH 4/5] rtmutex: " Daniel Walker
@ 2008-06-11 20:49 ` Daniel Walker
2008-06-12 6:07 ` Peter Zijlstra
2008-06-12 8:56 ` Thomas Gleixner
3 siblings, 2 replies; 23+ messages in thread
From: Daniel Walker @ 2008-06-11 20:49 UTC (permalink / raw)
To: linux-kernel; +Cc: Ulrich Drepper, Thomas Gleixner, Arjan van de Ven
[-- Attachment #1: blocked_on-futex.patch --]
[-- Type: text/plain, Size: 4334 bytes --]
Adds an additional function call to the sched_setscheduler to update the
waiter position of a task if it happens to be waiting on a futex. This
ensures that the kernel level waiter ordering is correctly maintained
based on the changed priority of the task.
I fixed the locking issue noticed by Thomas Gleixner.
This doesn't address userspace at all, only the kernel level wakeups and
kernel level ordering.
The additional locking added to the futex_wait function has no visible speed
impact, and only effects waiters which actual enter the kernel.
Signed-off-by: Daniel Walker <dwalker@mvista.com>
---
include/linux/sched.h | 4 ++++
kernel/futex.c | 41 +++++++++++++++++++++++++++++++++++++++++
kernel/sched.c | 1 +
3 files changed, 46 insertions(+)
Index: linux-2.6.25/include/linux/sched.h
===================================================================
--- linux-2.6.25.orig/include/linux/sched.h
+++ linux-2.6.25/include/linux/sched.h
@@ -1027,6 +1027,7 @@ struct sched_rt_entity {
enum lock_waiter_type {
MUTEX_WAITER = 1,
RT_MUTEX_WAITER,
+ FUTEX_WAITER
};
struct lock_waiter_state {
@@ -1034,6 +1035,7 @@ struct lock_waiter_state {
union {
struct mutex_waiter *mutex_blocked_on;
struct rt_mutex_waiter *rt_blocked_on;
+ union futex_key *futex_blocked_on;
};
};
@@ -1675,6 +1677,8 @@ static inline int rt_mutex_getprio(struc
# define rt_mutex_adjust_pi(p) do { } while (0)
#endif
+extern void futex_adjust_waiters(struct task_struct *p);
+
extern void set_user_nice(struct task_struct *p, long nice);
extern int task_prio(const struct task_struct *p);
extern int task_nice(const struct task_struct *p);
Index: linux-2.6.25/kernel/futex.c
===================================================================
--- linux-2.6.25.orig/kernel/futex.c
+++ linux-2.6.25/kernel/futex.c
@@ -327,6 +327,38 @@ static int get_futex_value_locked(u32 *d
return ret ? -EFAULT : 0;
}
+void futex_adjust_waiters(struct task_struct *p)
+{
+
+ if (p->blocked_on) {
+ struct futex_hash_bucket *hb;
+ struct futex_q *q, *next;
+ union futex_key key;
+
+ spin_lock_irq(&p->pi_lock);
+ if (p->blocked_on && p->blocked_on->lock_type == FUTEX_WAITER) {
+ key = *p->blocked_on->futex_blocked_on;
+ spin_unlock_irq(&p->pi_lock);
+ } else {
+ spin_unlock_irq(&p->pi_lock);
+ return;
+ }
+
+ hb = hash_futex(&key);
+ spin_lock(&hb->lock);
+ plist_for_each_entry_safe(q, next, &hb->chain, list) {
+ if (match_futex(&q->key, &key) && q->task == p) {
+ int prio = min(p->normal_prio, MAX_RT_PRIO);
+ plist_del(&q->list, &hb->chain);
+ plist_node_init(&q->list, prio);
+ plist_add(&q->list, &hb->chain);
+ break;
+ }
+ }
+ spin_unlock(&hb->lock);
+ }
+}
+
/*
* Fault handling.
* if fshared is non NULL, current->mm->mmap_sem is already held
@@ -1159,6 +1191,8 @@ static int futex_wait(u32 __user *uaddr,
DECLARE_WAITQUEUE(wait, curr);
struct futex_hash_bucket *hb;
struct futex_q q;
+ struct lock_waiter_state blocked_on = {
+ .lock_type = FUTEX_WAITER, { .futex_blocked_on = &q.key } };
u32 uval;
int ret;
struct hrtimer_sleeper t;
@@ -1176,6 +1210,8 @@ static int futex_wait(u32 __user *uaddr,
if (unlikely(ret != 0))
goto out_release_sem;
+ set_blocked_on(current, &blocked_on);
+
hb = queue_lock(&q);
/*
@@ -1203,6 +1239,8 @@ static int futex_wait(u32 __user *uaddr,
if (unlikely(ret)) {
queue_unlock(&q, hb);
+ set_blocked_on(current, NULL);
+
/*
* If we would have faulted, release mmap_sem, fault it in and
* start all over again.
@@ -1276,6 +1314,8 @@ static int futex_wait(u32 __user *uaddr,
}
__set_current_state(TASK_RUNNING);
+ set_blocked_on(current, NULL);
+
/*
* NOTE: we don't remove ourselves from the waitqueue because
* we are the only user of it.
@@ -1310,6 +1350,7 @@ static int futex_wait(u32 __user *uaddr,
out_unlock_release_sem:
queue_unlock(&q, hb);
+ set_blocked_on(current, NULL);
out_release_sem:
futex_unlock_mm(fshared);
Index: linux-2.6.25/kernel/sched.c
===================================================================
--- linux-2.6.25.orig/kernel/sched.c
+++ linux-2.6.25/kernel/sched.c
@@ -5209,6 +5209,7 @@ recheck:
spin_unlock_irqrestore(&p->pi_lock, flags);
rt_mutex_adjust_pi(p);
+ futex_adjust_waiters(p);
return 0;
}
--
^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [PATCH 2/5] futex: update prio on requeue
2008-06-11 20:49 ` [PATCH 2/5] futex: update prio on requeue Daniel Walker
@ 2008-06-12 5:22 ` Peter Zijlstra
0 siblings, 0 replies; 23+ messages in thread
From: Peter Zijlstra @ 2008-06-12 5:22 UTC (permalink / raw)
To: Daniel Walker
Cc: linux-kernel, Ulrich Drepper, Thomas Gleixner, Arjan van de Ven
On Wed, 2008-06-11 at 13:49 -0700, Daniel Walker wrote:
> plain text document attachment (futex-update-prio-on-requeue.patch)
> Since the priority may have changed, the requeue is a good place to update
> the priority since we're already deleting and adding to a new list.
>
> Signed-off-by: Daniel Walker <dwalker@mvista.com>
>
> ---
> kernel/futex.c | 3 +++
> 1 file changed, 3 insertions(+)
>
> Index: linux-2.6.25/kernel/futex.c
> ===================================================================
> --- linux-2.6.25.orig/kernel/futex.c
> +++ linux-2.6.25/kernel/futex.c
> @@ -980,7 +980,10 @@ static int futex_requeue(u32 __user *uad
> * requeue.
> */
> if (likely(head1 != &hb2->chain)) {
> + struct task_struct *task = this->task;
> + int prio = min(task->normal_prio, MAX_RT_PRIO);
we want whitespace between var declarations and code
> plist_del(&this->list, &hb1->chain);
> + plist_node_init(&this->list, prio);
> plist_add(&this->list, &hb2->chain);
> this->lock_ptr = &hb2->lock;
> #ifdef CONFIG_DEBUG_PI_LIST
>
^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [PATCH 3/5] mutex debug: add generic blocked_on usage
2008-06-11 20:49 ` [PATCH 3/5] mutex debug: add generic blocked_on usage Daniel Walker
@ 2008-06-12 5:25 ` Peter Zijlstra
2008-06-12 13:21 ` Daniel Walker
0 siblings, 1 reply; 23+ messages in thread
From: Peter Zijlstra @ 2008-06-12 5:25 UTC (permalink / raw)
To: Daniel Walker
Cc: linux-kernel, Ulrich Drepper, Thomas Gleixner, Arjan van de Ven
On Wed, 2008-06-11 at 13:49 -0700, Daniel Walker wrote:
> plain text document attachment (blocked_on-mutex.patch)
> -#ifdef CONFIG_DEBUG_MUTEXES
> - /* mutex deadlock detection */
> - struct mutex_waiter *blocked_on;
> -#endif
> + /*
> + * Deadlock detection and priority inheritance handling,
> + * and any other out of line mutex operations
> + */
> + struct lock_waiter_state *blocked_on;
Now you grew task_struct unconditionally, how about
#if defined CONFIG_DEBUG_MUTEX || defined CONFIG_RT_MUTEXES || defined CONFIG_FUTEX
?
^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [PATCH 5/5] futex: fix miss ordered wakeups
2008-06-11 20:49 ` [PATCH 5/5] futex: fix miss ordered wakeups Daniel Walker
@ 2008-06-12 6:07 ` Peter Zijlstra
2008-06-12 13:22 ` Daniel Walker
2008-06-12 8:56 ` Thomas Gleixner
1 sibling, 1 reply; 23+ messages in thread
From: Peter Zijlstra @ 2008-06-12 6:07 UTC (permalink / raw)
To: Daniel Walker
Cc: linux-kernel, Ulrich Drepper, Thomas Gleixner, Arjan van de Ven
On Wed, 2008-06-11 at 13:49 -0700, Daniel Walker wrote:
> plain text document attachment (blocked_on-futex.patch)
I thought everybody and his dog didn't care for this?
> +void futex_adjust_waiters(struct task_struct *p)
> +{
> +
Seemingly extra whitespace ^
> + if (p->blocked_on) {
> + struct futex_hash_bucket *hb;
> + struct futex_q *q, *next;
> + union futex_key key;
> +
> + spin_lock_irq(&p->pi_lock);
> + if (p->blocked_on && p->blocked_on->lock_type == FUTEX_WAITER) {
> + key = *p->blocked_on->futex_blocked_on;
> + spin_unlock_irq(&p->pi_lock);
> + } else {
> + spin_unlock_irq(&p->pi_lock);
> + return;
> + }
> +
> + hb = hash_futex(&key);
> + spin_lock(&hb->lock);
> + plist_for_each_entry_safe(q, next, &hb->chain, list) {
> + if (match_futex(&q->key, &key) && q->task == p) {
> + int prio = min(p->normal_prio, MAX_RT_PRIO);
> + plist_del(&q->list, &hb->chain);
> + plist_node_init(&q->list, prio);
> + plist_add(&q->list, &hb->chain);
> + break;
> + }
> + }
> + spin_unlock(&hb->lock);
> + }
> +}
Also, if you write it like:
if (!p->blocked_on)
return
do_other_stuff
you loose one nesting level - which imho looks better.
^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [PATCH 5/5] futex: fix miss ordered wakeups
2008-06-11 20:49 ` [PATCH 5/5] futex: fix miss ordered wakeups Daniel Walker
2008-06-12 6:07 ` Peter Zijlstra
@ 2008-06-12 8:56 ` Thomas Gleixner
2008-06-12 13:30 ` Daniel Walker
1 sibling, 1 reply; 23+ messages in thread
From: Thomas Gleixner @ 2008-06-12 8:56 UTC (permalink / raw)
To: Daniel Walker; +Cc: linux-kernel, Ulrich Drepper, Arjan van de Ven
On Wed, 11 Jun 2008, Daniel Walker wrote:
> Adds an additional function call to the sched_setscheduler to update the
> waiter position of a task if it happens to be waiting on a futex. This
> ensures that the kernel level waiter ordering is correctly maintained
> based on the changed priority of the task.
>
> I fixed the locking issue noticed by Thomas Gleixner.
>
> This doesn't address userspace at all, only the kernel level wakeups and
> kernel level ordering.
>
> The additional locking added to the futex_wait function has no visible speed
> impact, and only effects waiters which actual enter the kernel.
The additional locking is just broken and you did not even bother to
test your changes with lockdep.
Aside of this, these patches still add 100 lines of code to achieve
nothing - as dicussed when you previously submitted your changes.
Please stop wasting everyone's time with that.
Thanks,
tglx
^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [PATCH 3/5] mutex debug: add generic blocked_on usage
2008-06-12 5:25 ` Peter Zijlstra
@ 2008-06-12 13:21 ` Daniel Walker
0 siblings, 0 replies; 23+ messages in thread
From: Daniel Walker @ 2008-06-12 13:21 UTC (permalink / raw)
To: Peter Zijlstra
Cc: linux-kernel, Ulrich Drepper, Thomas Gleixner, Arjan van de Ven
On Thu, 2008-06-12 at 07:25 +0200, Peter Zijlstra wrote:
> On Wed, 2008-06-11 at 13:49 -0700, Daniel Walker wrote:
> > plain text document attachment (blocked_on-mutex.patch)
>
> > -#ifdef CONFIG_DEBUG_MUTEXES
> > - /* mutex deadlock detection */
> > - struct mutex_waiter *blocked_on;
> > -#endif
> > + /*
> > + * Deadlock detection and priority inheritance handling,
> > + * and any other out of line mutex operations
> > + */
> > + struct lock_waiter_state *blocked_on;
>
> Now you grew task_struct unconditionally, how about
>
> #if defined CONFIG_DEBUG_MUTEX || defined CONFIG_RT_MUTEXES || defined CONFIG_FUTEX
>
> ?
Ok .. I didn't add it cause I wasn't convinced RT_MUTEXES was
optional .. There's a lot of PI futex related code that isn't ifdef'd
either..
Daniel
^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [PATCH 5/5] futex: fix miss ordered wakeups
2008-06-12 6:07 ` Peter Zijlstra
@ 2008-06-12 13:22 ` Daniel Walker
2008-06-12 13:57 ` Peter Zijlstra
0 siblings, 1 reply; 23+ messages in thread
From: Daniel Walker @ 2008-06-12 13:22 UTC (permalink / raw)
To: Peter Zijlstra
Cc: linux-kernel, Ulrich Drepper, Thomas Gleixner, Arjan van de Ven
On Thu, 2008-06-12 at 08:07 +0200, Peter Zijlstra wrote:
> On Wed, 2008-06-11 at 13:49 -0700, Daniel Walker wrote:
> > plain text document attachment (blocked_on-futex.patch)
>
> I thought everybody and his dog didn't care for this?
Just Thomas ..
> Also, if you write it like:
>
> if (!p->blocked_on)
> return
>
> do_other_stuff
>
> you loose one nesting level - which imho looks better.
Yeah ..
Daniel
^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [PATCH 5/5] futex: fix miss ordered wakeups
2008-06-12 8:56 ` Thomas Gleixner
@ 2008-06-12 13:30 ` Daniel Walker
2008-06-12 13:33 ` Thomas Gleixner
0 siblings, 1 reply; 23+ messages in thread
From: Daniel Walker @ 2008-06-12 13:30 UTC (permalink / raw)
To: Thomas Gleixner; +Cc: linux-kernel, Ulrich Drepper, Arjan van de Ven
On Thu, 2008-06-12 at 10:56 +0200, Thomas Gleixner wrote:
> On Wed, 11 Jun 2008, Daniel Walker wrote:
> > Adds an additional function call to the sched_setscheduler to update the
> > waiter position of a task if it happens to be waiting on a futex. This
> > ensures that the kernel level waiter ordering is correctly maintained
> > based on the changed priority of the task.
> >
> > I fixed the locking issue noticed by Thomas Gleixner.
> >
> > This doesn't address userspace at all, only the kernel level wakeups and
> > kernel level ordering.
> >
> > The additional locking added to the futex_wait function has no visible speed
> > impact, and only effects waiters which actual enter the kernel.
>
> The additional locking is just broken and you did not even bother to
> test your changes with lockdep.
I ran it with lockdep enabled , I didn't get any warnings..
> Aside of this, these patches still add 100 lines of code to achieve
> nothing - as dicussed when you previously submitted your changes.
>
> Please stop wasting everyone's time with that.
It achieves correct ordering of the futex waiters inside the kernel,
that is in fact _something_ ..
Daniel
^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [PATCH 5/5] futex: fix miss ordered wakeups
2008-06-12 13:30 ` Daniel Walker
@ 2008-06-12 13:33 ` Thomas Gleixner
2008-06-12 13:44 ` Daniel Walker
0 siblings, 1 reply; 23+ messages in thread
From: Thomas Gleixner @ 2008-06-12 13:33 UTC (permalink / raw)
To: Daniel Walker; +Cc: linux-kernel, Ulrich Drepper, Arjan van de Ven
On Thu, 12 Jun 2008, Daniel Walker wrote:
> On Thu, 2008-06-12 at 10:56 +0200, Thomas Gleixner wrote:
> > Please stop wasting everyone's time with that.
>
> It achieves correct ordering of the futex waiters inside the kernel,
> that is in fact _something_ ..
Yeah, just something _useless_
Thanks,
tglx
^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [PATCH 5/5] futex: fix miss ordered wakeups
2008-06-12 13:33 ` Thomas Gleixner
@ 2008-06-12 13:44 ` Daniel Walker
2008-06-12 15:24 ` Thomas Gleixner
0 siblings, 1 reply; 23+ messages in thread
From: Daniel Walker @ 2008-06-12 13:44 UTC (permalink / raw)
To: Thomas Gleixner; +Cc: linux-kernel, Ulrich Drepper, Arjan van de Ven
On Thu, 2008-06-12 at 15:33 +0200, Thomas Gleixner wrote:
> On Thu, 12 Jun 2008, Daniel Walker wrote:
> > On Thu, 2008-06-12 at 10:56 +0200, Thomas Gleixner wrote:
> > > Please stop wasting everyone's time with that.
> >
> > It achieves correct ordering of the futex waiters inside the kernel,
> > that is in fact _something_ ..
>
> Yeah, just something _useless_
Just because you don't use it, doesn't make it useless .. At least
there's enough people asking for this that it warrants me writing it..
Daniel
^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [PATCH 5/5] futex: fix miss ordered wakeups
2008-06-12 13:22 ` Daniel Walker
@ 2008-06-12 13:57 ` Peter Zijlstra
2008-06-12 14:04 ` Daniel Walker
0 siblings, 1 reply; 23+ messages in thread
From: Peter Zijlstra @ 2008-06-12 13:57 UTC (permalink / raw)
To: Daniel Walker
Cc: linux-kernel, Ulrich Drepper, Thomas Gleixner, Arjan van de Ven
On Thu, 2008-06-12 at 06:22 -0700, Daniel Walker wrote:
> On Thu, 2008-06-12 at 08:07 +0200, Peter Zijlstra wrote:
> > On Wed, 2008-06-11 at 13:49 -0700, Daniel Walker wrote:
> > > plain text document attachment (blocked_on-futex.patch)
> >
> > I thought everybody and his dog didn't care for this?
>
> Just Thomas ..
And me, I really don't see the point in this, use PI futexes already if
you care for wakeup ordering.
^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [PATCH 5/5] futex: fix miss ordered wakeups
2008-06-12 13:57 ` Peter Zijlstra
@ 2008-06-12 14:04 ` Daniel Walker
0 siblings, 0 replies; 23+ messages in thread
From: Daniel Walker @ 2008-06-12 14:04 UTC (permalink / raw)
To: Peter Zijlstra
Cc: linux-kernel, Ulrich Drepper, Thomas Gleixner, Arjan van de Ven
On Thu, 2008-06-12 at 15:57 +0200, Peter Zijlstra wrote:
> On Thu, 2008-06-12 at 06:22 -0700, Daniel Walker wrote:
> > On Thu, 2008-06-12 at 08:07 +0200, Peter Zijlstra wrote:
> > > On Wed, 2008-06-11 at 13:49 -0700, Daniel Walker wrote:
> > > > plain text document attachment (blocked_on-futex.patch)
> > >
> > > I thought everybody and his dog didn't care for this?
> >
> > Just Thomas ..
>
> And me, I really don't see the point in this, use PI futexes already if
> you care for wakeup ordering.
Does it make sense that we allow a priority based plist ordering, but we
don't keep the priorities up to date? That doesn't make sense to me.
Daniel
^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [PATCH 5/5] futex: fix miss ordered wakeups
2008-06-12 13:44 ` Daniel Walker
@ 2008-06-12 15:24 ` Thomas Gleixner
2008-06-12 15:56 ` Daniel Walker
0 siblings, 1 reply; 23+ messages in thread
From: Thomas Gleixner @ 2008-06-12 15:24 UTC (permalink / raw)
To: Daniel Walker; +Cc: LKML, Ulrich Drepper, Arjan van de Ven, Peter Zijlstra
On Thu, 12 Jun 2008, Daniel Walker wrote:
> On Thu, 2008-06-12 at 15:33 +0200, Thomas Gleixner wrote:
> > On Thu, 12 Jun 2008, Daniel Walker wrote:
> > > On Thu, 2008-06-12 at 10:56 +0200, Thomas Gleixner wrote:
> > > > Please stop wasting everyone's time with that.
> > >
> > > It achieves correct ordering of the futex waiters inside the kernel,
> > > that is in fact _something_ ..
> >
> > Yeah, just something _useless_
>
> Just because you don't use it, doesn't make it useless .. At least
> there's enough people asking for this that it warrants me writing it..
Which is not really a good technical reason to merge such a
patch. Your handwaving about "enough people" is just irrelevant. Are
you going to implement a root hole as well when enough people ask for
it ?
But there is also a Real Good technical reason why these patches are
going nowhere else than into /dev/null:
your approach of hijacking blocked_on is fundamentaly wrong as it
mixes kernel internal state with user space state.
It will break in preempt-rt at the point when this state is set and
the code blocks on a spinlock, which uses the rtmutex based sleeping
spinlock implementation and overwrites blocked_on.
If it can acquire the spinlock in the fast path without modifying
blocked_on it will cause trouble with the priority boosting chain
when a higher priority task becomes blocked on the spinlock.
If there would be a real good technical reason to fix this priority
ordering it could be done with less than 20 lines of code without
extra locking and wreckage waiting left and right, but I have not yet
seen a single convincing technical argument or a relevant use case
which might justify that.
Thanks,
tglx
^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [PATCH 5/5] futex: fix miss ordered wakeups
2008-06-12 15:24 ` Thomas Gleixner
@ 2008-06-12 15:56 ` Daniel Walker
2008-06-12 19:55 ` Thomas Gleixner
0 siblings, 1 reply; 23+ messages in thread
From: Daniel Walker @ 2008-06-12 15:56 UTC (permalink / raw)
To: Thomas Gleixner; +Cc: LKML, Ulrich Drepper, Arjan van de Ven, Peter Zijlstra
On Thu, 2008-06-12 at 17:24 +0200, Thomas Gleixner wrote:
> > Just because you don't use it, doesn't make it useless .. At least
> > there's enough people asking for this that it warrants me writing it..
>
> Which is not really a good technical reason to merge such a
> patch. Your handwaving about "enough people" is just irrelevant. Are
> you going to implement a root hole as well when enough people ask for
> it ?
People asking for something is a very good reason to merge "features" ..
You can like or dislike implementations , but that doesn't reflect on
the nature of the feature.
> But there is also a Real Good technical reason why these patches are
> going nowhere else than into /dev/null:
>
> your approach of hijacking blocked_on is fundamentaly wrong as it
> mixes kernel internal state with user space state.
It mixes kernel state with kernel state, not to mention each state is
isolated from the others.
> It will break in preempt-rt at the point when this state is set and
> the code blocks on a spinlock, which uses the rtmutex based sleeping
> spinlock implementation and overwrites blocked_on.
That's an intersting point, however "preempt-rt" is out of tree, so it's
certainly not going be a reason to reject mainline changes.
> If there would be a real good technical reason to fix this priority
> ordering it could be done with less than 20 lines of code without
> extra locking and wreckage waiting left and right, but I have not yet
> seen a single convincing technical argument or a relevant use case
> which might justify that.
The technical reasons for including this are the same technical reasons
why we want the waiters queued in priority order .. It's a requirement
of posix, where many calls need the ordering and ultimately feed into
the futex interface. So we have every reason to do the ordering
correctly..
If you have a 20 line fix for this then great tell me what it is..
Daniel
^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [PATCH 5/5] futex: fix miss ordered wakeups
2008-06-12 15:56 ` Daniel Walker
@ 2008-06-12 19:55 ` Thomas Gleixner
2008-06-12 22:09 ` Daniel Walker
0 siblings, 1 reply; 23+ messages in thread
From: Thomas Gleixner @ 2008-06-12 19:55 UTC (permalink / raw)
To: Daniel Walker; +Cc: LKML, Ulrich Drepper, Arjan van de Ven, Peter Zijlstra
On Thu, 12 Jun 2008, Daniel Walker wrote:
>
> On Thu, 2008-06-12 at 17:24 +0200, Thomas Gleixner wrote:
> > > Just because you don't use it, doesn't make it useless .. At least
> > > there's enough people asking for this that it warrants me writing it..
> >
> > Which is not really a good technical reason to merge such a
> > patch. Your handwaving about "enough people" is just irrelevant. Are
> > you going to implement a root hole as well when enough people ask for
> > it ?
>
> People asking for something is a very good reason to merge "features" ..
> You can like or dislike implementations , but that doesn't reflect on
> the nature of the feature.
Again, I have not yet seen a single request aside of yours. Who is
asking for that and what is the rationale for their request?
> > But there is also a Real Good technical reason why these patches are
> > going nowhere else than into /dev/null:
> >
> > your approach of hijacking blocked_on is fundamentaly wrong as it
> > mixes kernel internal state with user space state.
>
> It mixes kernel state with kernel state, not to mention each state is
> isolated from the others.
Wrong. The task is blocked on the user space futex and not on an in
kernel concurrency control. What's the isolation, the enum or the
union ? LOL.
The blocked_on mechanism is restricted to kernel internal concurrency
controls and while mutex and rt_mutex could share a common storage,
the user space futex can not. That would preclude to ever use a
(rt_)mutex in the futex code.
The reason is simple: A task can only be blocked on one concurrency
control, but it can be blocked on a user space futex and later on an
in kernel concurrency control.
Just get it. These are different concepts and do not mix at all.
> > It will break in preempt-rt at the point when this state is set and
> > the code blocks on a spinlock, which uses the rtmutex based sleeping
> > spinlock implementation and overwrites blocked_on.
>
> That's an intersting point, however "preempt-rt" is out of tree, so it's
> certainly not going be a reason to reject mainline changes.
It is a perfectly good reason, simply because we know that rt is on
the way to mainline and it would be pretty stupid to merge a change
with a questionable technical merit which needs to be reverted and
overhauled in the foreseeable future. Other not yet mainline features
/ changes coordinate as well to avoid wreckage like that.
Also putting on my preempt-rt hat I just wonder about your brashly
impertinence to expect that others clean up the mess you create.
> > If there would be a real good technical reason to fix this priority
> > ordering it could be done with less than 20 lines of code without
> > extra locking and wreckage waiting left and right, but I have not yet
> > seen a single convincing technical argument or a relevant use case
> > which might justify that.
>
> The technical reasons for including this are the same technical reasons
> why we want the waiters queued in priority order .. It's a requirement
> of posix, where many calls need the ordering and ultimately feed into
> the futex interface. So we have every reason to do the ordering
> correctly..
Making a halfways correct thing a bit more correct is not going to
reach full correctness.
Also your interpretation of the POSIX requirement is very
questionable:
"If there are threads blocked on the mutex object referenced by mutex
when pthread_mutex_unlock() is called, resulting in the mutex
becoming available, the scheduling policy shall determine which
thread shall acquire the mutex."
So there is no explicit guarantee requirement AFAICT. "... the
scheduling policy shall determine ..." is a rather vague term which
has enough room for interpretation.
My interpretation is, whoever has/gets the CPU will get the mutex. So
why should I care about the priority order correctness, which is only
incorrect in a corner case. Especially in a corner case which is not a
hot path in any sane application:
It requires thread A to block on a futex together with other threads
and thread B to adjust the priority of thread A while it is
blocked.
This is definitely _NOT_ the common case in any application I'm aware
of.
If you think that this actually matters, then please stop your
handwaving and provide proof. I'm really keen to add the absurdities
of those use cases to my Ugly Code Museum.
Furthermore the rationale section says explicitely:
"The mutex functions and the particular default settings of the mutex
attributes have been motivated by the desire to not preclude fast,
inlined implementations of mutex locking and unlocking."
This is another confirmation that the default pthread_mutex
implementation aims for performance and not for correctness.
We have an PI implementation which provides full correctness, so there
is no need to add artificial correctness to something which is by
default incorrect.
> If you have a 20 line fix for this then great tell me what it is..
I might write the patch once I can see a real world use case where
this actually matters.
Thanks,
tglx
^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [PATCH 5/5] futex: fix miss ordered wakeups
2008-06-12 19:55 ` Thomas Gleixner
@ 2008-06-12 22:09 ` Daniel Walker
2008-06-12 22:43 ` Thomas Gleixner
0 siblings, 1 reply; 23+ messages in thread
From: Daniel Walker @ 2008-06-12 22:09 UTC (permalink / raw)
To: Thomas Gleixner; +Cc: LKML, Ulrich Drepper, Arjan van de Ven, Peter Zijlstra
On Thu, 2008-06-12 at 21:55 +0200, Thomas Gleixner wrote:
> Also your interpretation of the POSIX requirement is very
> questionable:
>
> "If there are threads blocked on the mutex object referenced by mutex
> when pthread_mutex_unlock() is called, resulting in the mutex
> becoming available, the scheduling policy shall determine which
> thread shall acquire the mutex."
The key is "scheduling policy" .. When the mutex is un-blocked the next
task to run is the same as if the scheduler was selecting tasks from the
list of blocked tasks .. For Linux, that means the highest priority
tasks should be selected.. So it's no more acceptable for the scheduler
to priority invert some tasks than it is for the futex to do it.
Daniel
^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [PATCH 5/5] futex: fix miss ordered wakeups
2008-06-12 22:09 ` Daniel Walker
@ 2008-06-12 22:43 ` Thomas Gleixner
2008-06-12 23:06 ` Daniel Walker
0 siblings, 1 reply; 23+ messages in thread
From: Thomas Gleixner @ 2008-06-12 22:43 UTC (permalink / raw)
To: Daniel Walker; +Cc: LKML, Ulrich Drepper, Arjan van de Ven, Peter Zijlstra
On Thu, 12 Jun 2008, Daniel Walker wrote:
>
> On Thu, 2008-06-12 at 21:55 +0200, Thomas Gleixner wrote:
> > Also your interpretation of the POSIX requirement is very
> > questionable:
> >
> > "If there are threads blocked on the mutex object referenced by mutex
> > when pthread_mutex_unlock() is called, resulting in the mutex
> > becoming available, the scheduling policy shall determine which
> > thread shall acquire the mutex."
>
> The key is "scheduling policy" .. When the mutex is un-blocked the next
> task to run is the same as if the scheduler was selecting tasks from the
> list of blocked tasks .. For Linux, that means the highest priority
> tasks should be selected.. So it's no more acceptable for the scheduler
> to priority invert some tasks than it is for the futex to do it.
Sigh, when do you actually get a gripe that the default futex
implementation does not and can not guarantee that at all and therefor
your "correctness" patch is as important as a bag of rice which
toopled over in China ?
Provide answers to the real questions I asked more than once:
What's the real world problem ? Who cares about that - except you ?
Up to the point where you are actually able to come up with that
answers please direct your replies to /dev/null. That avoids that I
have to touch my .procmailrc.
Thanks,
tglx
^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [PATCH 5/5] futex: fix miss ordered wakeups
2008-06-12 22:43 ` Thomas Gleixner
@ 2008-06-12 23:06 ` Daniel Walker
2008-06-12 23:30 ` Thomas Gleixner
0 siblings, 1 reply; 23+ messages in thread
From: Daniel Walker @ 2008-06-12 23:06 UTC (permalink / raw)
To: Thomas Gleixner; +Cc: LKML, Ulrich Drepper, Arjan van de Ven, Peter Zijlstra
On Fri, 2008-06-13 at 00:43 +0200, Thomas Gleixner wrote:
> On Thu, 12 Jun 2008, Daniel Walker wrote:
>
> >
> > On Thu, 2008-06-12 at 21:55 +0200, Thomas Gleixner wrote:
> > > Also your interpretation of the POSIX requirement is very
> > > questionable:
> > >
> > > "If there are threads blocked on the mutex object referenced by mutex
> > > when pthread_mutex_unlock() is called, resulting in the mutex
> > > becoming available, the scheduling policy shall determine which
> > > thread shall acquire the mutex."
> >
> > The key is "scheduling policy" .. When the mutex is un-blocked the next
> > task to run is the same as if the scheduler was selecting tasks from the
> > list of blocked tasks .. For Linux, that means the highest priority
> > tasks should be selected.. So it's no more acceptable for the scheduler
> > to priority invert some tasks than it is for the futex to do it.
>
> Sigh, when do you actually get a gripe that the default futex
> implementation does not and can not guarantee that at all and therefor
> your "correctness" patch is as important as a bag of rice which
> toopled over in China ?
Well, the last email I got from Arjan said this,
".. Don't look at the release path... look at the acquire path.
If a thread sees the futex is free, it'll take it, without even going
to the kernel at all."
And yes, I understand that fully.
> Provide answers to the real questions I asked more than once:
>
> What's the real world problem ? Who cares about that - except you ?
Any application which starts a thread, and later changes the priority
can observe the miss-ordering.. That's pretty common..
Daniel
^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [PATCH 5/5] futex: fix miss ordered wakeups
2008-06-12 23:06 ` Daniel Walker
@ 2008-06-12 23:30 ` Thomas Gleixner
0 siblings, 0 replies; 23+ messages in thread
From: Thomas Gleixner @ 2008-06-12 23:30 UTC (permalink / raw)
To: Daniel Walker; +Cc: LKML, Ulrich Drepper, Arjan van de Ven, Peter Zijlstra
On Thu, 12 Jun 2008, Daniel Walker wrote:
> On Fri, 2008-06-13 at 00:43 +0200, Thomas Gleixner wrote:
> > On Thu, 12 Jun 2008, Daniel Walker wrote:
> >
> > >
> > > On Thu, 2008-06-12 at 21:55 +0200, Thomas Gleixner wrote:
> > > > Also your interpretation of the POSIX requirement is very
> > > > questionable:
> > > >
> > > > "If there are threads blocked on the mutex object referenced by mutex
> > > > when pthread_mutex_unlock() is called, resulting in the mutex
> > > > becoming available, the scheduling policy shall determine which
> > > > thread shall acquire the mutex."
> > >
> > > The key is "scheduling policy" .. When the mutex is un-blocked the next
> > > task to run is the same as if the scheduler was selecting tasks from the
> > > list of blocked tasks .. For Linux, that means the highest priority
> > > tasks should be selected.. So it's no more acceptable for the scheduler
> > > to priority invert some tasks than it is for the futex to do it.
> >
> > Sigh, when do you actually get a gripe that the default futex
> > implementation does not and can not guarantee that at all and therefor
> > your "correctness" patch is as important as a bag of rice which
> > toopled over in China ?
>
> Well, the last email I got from Arjan said this,
>
> ".. Don't look at the release path... look at the acquire path.
> If a thread sees the futex is free, it'll take it, without even going
> to the kernel at all."
>
> And yes, I understand that fully.
Great. Case closed, nothing to argue about.
Thanks,
tglx
^ permalink raw reply [flat|nested] 23+ messages in thread
end of thread, other threads:[~2008-06-12 23:33 UTC | newest]
Thread overview: 23+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-06-11 20:49 [PATCH 1/5] futex: checkpatch cleanup Daniel Walker
2008-06-11 20:49 ` [PATCH 2/5] futex: update prio on requeue Daniel Walker
2008-06-12 5:22 ` Peter Zijlstra
2008-06-11 20:49 ` [PATCH 3/5] mutex debug: add generic blocked_on usage Daniel Walker
2008-06-12 5:25 ` Peter Zijlstra
2008-06-12 13:21 ` Daniel Walker
2008-06-11 20:49 ` [PATCH 4/5] rtmutex: " Daniel Walker
2008-06-11 20:49 ` [PATCH 5/5] futex: fix miss ordered wakeups Daniel Walker
2008-06-12 6:07 ` Peter Zijlstra
2008-06-12 13:22 ` Daniel Walker
2008-06-12 13:57 ` Peter Zijlstra
2008-06-12 14:04 ` Daniel Walker
2008-06-12 8:56 ` Thomas Gleixner
2008-06-12 13:30 ` Daniel Walker
2008-06-12 13:33 ` Thomas Gleixner
2008-06-12 13:44 ` Daniel Walker
2008-06-12 15:24 ` Thomas Gleixner
2008-06-12 15:56 ` Daniel Walker
2008-06-12 19:55 ` Thomas Gleixner
2008-06-12 22:09 ` Daniel Walker
2008-06-12 22:43 ` Thomas Gleixner
2008-06-12 23:06 ` Daniel Walker
2008-06-12 23:30 ` Thomas Gleixner
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox