* [patch 3/4] pi-futex: Fix exit races and locking problems
2007-06-08 0:29 [patch 0/4] PI-Futex bugfixes and cleanups Thomas Gleixner
2007-06-08 0:29 ` [patch 1/4] rt-mutex: Fix stale return value Thomas Gleixner
2007-06-08 0:29 ` [patch 2/4] rt-mutex: Fix chain walk early wakeup bug Thomas Gleixner
@ 2007-06-08 0:29 ` Thomas Gleixner
2007-06-08 7:16 ` Ingo Molnar
2007-06-08 0:29 ` [patch 4/4] FUTEX: Tidy up the code Thomas Gleixner
3 siblings, 1 reply; 9+ messages in thread
From: Thomas Gleixner @ 2007-06-08 0:29 UTC (permalink / raw)
To: Andrew Morton
Cc: LKML, Ingo Molnar, Steven Rostedt, Alexey Kuznetsov,
Ulrich Drepper
[-- Attachment #1: pi-futex-fixes.patch --]
[-- Type: text/plain, Size: 18652 bytes --]
From: Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
1. New entries can be added to tsk->pi_state_list after task completed
exit_pi_state_list(). The result is memory leakage and deadlocks.
2. handle_mm_fault() is called under spinlock. The result is obvious.
3. results in self-inflicted deadlock inside glibc.
Sometimes futex_lock_pi returns -ESRCH, when it is not expected
and glibc enters to for(;;) sleep() to simulate deadlock. This problem
is quite obvious and I think the patch is right. Though it looks like
each "if" in futex_lock_pi() got some stupid special case "else if". :-)
4. sometimes futex_lock_pi() returns -EDEADLK,
when nobody has the lock. The reason is also obvious (see comment
in the patch), but correct fix is far beyond my comprehension.
I guess someone already saw this, the chunk:
if (rt_mutex_trylock(&q.pi_state->pi_mutex))
ret = 0;
is obviously from the same opera. But it does not work, because the
rtmutex is really taken at this point: wake_futex_pi() of previous
owner reassigned it to us. My fix works. But it looks very stupid.
I would think about removal of shift of ownership in wake_futex_pi()
and making all the work in context of process taking lock.
From: Thomas Gleixner <tglx@linutronix.de>
Fix 1) Avoid the tasklist lock variant of the exit race fix by adding
an additional state transition to the exit code.
This fixes also the issue, when a task with recursive segfaults
is not able to release the futexes.
Fix 2) Cleanup the lookup_pi_state() failure path and solve the -ESRCH
problem finally.
Fix 3) Solve the fixup_pi_state_owner() problem which needs to do the fixup
in the lock protected section by using the in_atomic userspace access
functions.
This removes also the ugly lock drop / unqueue inside of fixup_pi_state()
Fix 4) Fix a stale lock in the error path of futex_wake_pi()
Added some error checks for verification.
The -EDEADLK problem is solved by the rtmutex fixups.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
include/linux/sched.h | 1
kernel/exit.c | 24 ++++
kernel/futex.c | 271 +++++++++++++++++++++++++++++---------------------
3 files changed, 184 insertions(+), 112 deletions(-)
Index: linux-2.6.22-rc4/kernel/futex.c
===================================================================
--- linux-2.6.22-rc4.orig/kernel/futex.c 2007-06-08 01:42:29.000000000 +0200
+++ linux-2.6.22-rc4/kernel/futex.c 2007-06-08 01:46:59.000000000 +0200
@@ -430,10 +430,6 @@ static struct task_struct * futex_find_g
p = NULL;
goto out_unlock;
}
- if (p->exit_state != 0) {
- p = NULL;
- goto out_unlock;
- }
get_task_struct(p);
out_unlock:
rcu_read_unlock();
@@ -502,7 +498,7 @@ lookup_pi_state(u32 uval, struct futex_h
struct futex_q *this, *next;
struct plist_head *head;
struct task_struct *p;
- pid_t pid;
+ pid_t pid = uval & FUTEX_TID_MASK;
head = &hb->chain;
@@ -520,6 +516,8 @@ lookup_pi_state(u32 uval, struct futex_h
return -EINVAL;
WARN_ON(!atomic_read(&pi_state->refcount));
+ WARN_ON(pid && pi_state->owner &&
+ pi_state->owner->pid != pid);
atomic_inc(&pi_state->refcount);
*ps = pi_state;
@@ -530,15 +528,33 @@ lookup_pi_state(u32 uval, struct futex_h
/*
* We are the first waiter - try to look up the real owner and attach
- * the new pi_state to it, but bail out when the owner died bit is set
- * and TID = 0:
+ * the new pi_state to it, but bail out when TID = 0
*/
- pid = uval & FUTEX_TID_MASK;
- if (!pid && (uval & FUTEX_OWNER_DIED))
+ if (!pid)
return -ESRCH;
p = futex_find_get_task(pid);
- if (!p)
- return -ESRCH;
+ if (IS_ERR(p))
+ return PTR_ERR(p);
+
+ /*
+ * We need to look at the task state flags to figure out,
+ * whether the task is exiting. To protect against the do_exit
+ * change of the task flags, we do this protected by
+ * p->pi_lock:
+ */
+ spin_lock_irq(&p->pi_lock);
+ if (unlikely(p->flags & PF_EXITING)) {
+ /*
+ * The task is on the way out. When PF_EXITPIDONE is
+ * set, we know that the task has finished the
+ * cleanup:
+ */
+ int ret = (p->flags & PF_EXITPIDONE) ? -ESRCH : -EAGAIN;
+
+ spin_unlock_irq(&p->pi_lock);
+ put_task_struct(p);
+ return ret;
+ }
pi_state = alloc_pi_state();
@@ -551,7 +567,6 @@ lookup_pi_state(u32 uval, struct futex_h
/* Store the key for possible exit cleanups: */
pi_state->key = *key;
- spin_lock_irq(&p->pi_lock);
WARN_ON(!list_empty(&pi_state->list));
list_add(&pi_state->list, &p->pi_state_list);
pi_state->owner = p;
@@ -618,6 +633,8 @@ static int wake_futex_pi(u32 __user *uad
* preserve the owner died bit.)
*/
if (!(uval & FUTEX_OWNER_DIED)) {
+ int ret = 0;
+
newval = FUTEX_WAITERS | new_owner->pid;
/* Keep the FUTEX_WAITER_REQUEUED flag if it was set */
newval |= (uval & FUTEX_WAITER_REQUEUED);
@@ -625,10 +642,15 @@ static int wake_futex_pi(u32 __user *uad
pagefault_disable();
curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval);
pagefault_enable();
+
if (curval == -EFAULT)
- return -EFAULT;
+ ret = -EFAULT;
if (curval != uval)
- return -EINVAL;
+ ret = -EINVAL;
+ if (ret) {
+ spin_unlock(&pi_state->pi_mutex.wait_lock);
+ return ret;
+ }
}
spin_lock_irq(&pi_state->owner->pi_lock);
@@ -1174,7 +1196,7 @@ static int futex_requeue(u32 __user *uad
#ifdef CONFIG_DEBUG_PI_LIST
this->list.plist.lock = &hb2->lock;
#endif
- }
+ }
this->key = key2;
get_futex_key_refs(&key2);
drop_count++;
@@ -1326,12 +1348,10 @@ static void unqueue_me_pi(struct futex_q
/*
* Fixup the pi_state owner with current.
*
- * The cur->mm semaphore must be held, it is released at return of this
- * function.
+ * Must be called with hash bucket lock held and mm->sem held for non
+ * private futexes.
*/
-static int fixup_pi_state_owner(u32 __user *uaddr, struct rw_semaphore *fshared,
- struct futex_q *q,
- struct futex_hash_bucket *hb,
+static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
struct task_struct *curr)
{
u32 newtid = curr->pid | FUTEX_WAITERS;
@@ -1355,23 +1375,24 @@ static int fixup_pi_state_owner(u32 __us
list_add(&pi_state->list, &curr->pi_state_list);
spin_unlock_irq(&curr->pi_lock);
- /* Unqueue and drop the lock */
- unqueue_me_pi(q);
- if (fshared)
- up_read(fshared);
/*
* We own it, so we have to replace the pending owner
* TID. This must be atomic as we have preserve the
* owner died bit here.
*/
- ret = get_user(uval, uaddr);
+ ret = get_futex_value_locked(&uval, uaddr);
+
while (!ret) {
newval = (uval & FUTEX_OWNER_DIED) | newtid;
newval |= (uval & FUTEX_WAITER_REQUEUED);
+
+ pagefault_disable();
curval = futex_atomic_cmpxchg_inatomic(uaddr,
uval, newval);
+ pagefault_enable();
+
if (curval == -EFAULT)
- ret = -EFAULT;
+ ret = -EFAULT;
if (curval == uval)
break;
uval = curval;
@@ -1553,10 +1574,7 @@ static int futex_wait(u32 __user *uaddr,
*/
uaddr = q.pi_state->key.uaddr;
- /* mmap_sem and hash_bucket lock are unlocked at
- return of this function */
- ret = fixup_pi_state_owner(uaddr, fshared,
- &q, hb, curr);
+ ret = fixup_pi_state_owner(uaddr, &q, curr);
} else {
/*
* Catch the rare case, where the lock was released
@@ -1567,12 +1585,13 @@ static int futex_wait(u32 __user *uaddr,
if (rt_mutex_trylock(&q.pi_state->pi_mutex))
ret = 0;
}
- /* Unqueue and drop the lock */
- unqueue_me_pi(&q);
- if (fshared)
- up_read(fshared);
}
+ /* Unqueue and drop the lock */
+ unqueue_me_pi(&q);
+ if (fshared)
+ up_read(fshared);
+
debug_rt_mutex_free_waiter(&q.waiter);
return ret;
@@ -1688,7 +1707,7 @@ static int futex_lock_pi(u32 __user *uad
struct futex_hash_bucket *hb;
u32 uval, newval, curval;
struct futex_q q;
- int ret, lock_held, attempt = 0;
+ int ret, lock_taken, ownerdied = 0, attempt = 0;
if (refill_pi_state_cache())
return -ENOMEM;
@@ -1709,10 +1728,11 @@ static int futex_lock_pi(u32 __user *uad
if (unlikely(ret != 0))
goto out_release_sem;
+ retry_unlocked:
hb = queue_lock(&q, -1, NULL);
retry_locked:
- lock_held = 0;
+ ret = lock_taken = 0;
/*
* To avoid races, we attempt to take the lock here again
@@ -1728,43 +1748,44 @@ static int futex_lock_pi(u32 __user *uad
if (unlikely(curval == -EFAULT))
goto uaddr_faulted;
- /* We own the lock already */
+ /*
+ * Detect deadlocks. In case of REQUEUE_PI this is a valid
+ * situation and we return success to user space.
+ */
if (unlikely((curval & FUTEX_TID_MASK) == current->pid)) {
- if (!detect && 0)
- force_sig(SIGKILL, current);
- /*
- * Normally, this check is done in user space.
- * In case of requeue, the owner may attempt to lock this futex,
- * even if the ownership has already been given by the previous
- * waker.
- * In the usual case, this is a case of deadlock, but not in case
- * of REQUEUE_PI.
- */
if (!(curval & FUTEX_WAITER_REQUEUED))
ret = -EDEADLK;
goto out_unlock_release_sem;
}
/*
- * Surprise - we got the lock. Just return
- * to userspace:
+ * Surprise - we got the lock. Just return to userspace:
*/
if (unlikely(!curval))
goto out_unlock_release_sem;
uval = curval;
+
/*
- * In case of a requeue, check if there already is an owner
- * If not, just take the futex.
+ * Set the WAITERS flag, so the owner will know it has someone
+ * to wake at next unlock
*/
- if ((curval & FUTEX_WAITER_REQUEUED) && !(curval & FUTEX_TID_MASK)) {
- /* set current as futex owner */
- newval = curval | current->pid;
- lock_held = 1;
- } else
- /* Set the WAITERS flag, so the owner will know it has someone
- to wake at next unlock */
- newval = curval | FUTEX_WAITERS;
+ newval = curval | FUTEX_WAITERS;
+
+ /*
+ * There are two cases, where a futex might have no owner (the
+ * owner TID is 0): OWNER_DIED or REQUEUE. We take over the
+ * futex in this case. We also do an unconditional take over,
+ * when the owner of the futex died.
+ *
+ * This is safe as we are protected by the hash bucket lock !
+ */
+ if (unlikely(ownerdied || !(curval & FUTEX_TID_MASK))) {
+ /* Keep the OWNER_DIED and REQUEUE bits */
+ newval = (curval & ~FUTEX_TID_MASK) | current->pid;
+ ownerdied = 0;
+ lock_taken = 1;
+ }
pagefault_disable();
curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval);
@@ -1775,8 +1796,13 @@ static int futex_lock_pi(u32 __user *uad
if (unlikely(curval != uval))
goto retry_locked;
- if (lock_held) {
- set_pi_futex_owner(hb, &q.key, curr);
+ /*
+ * We took the lock due to requeue or owner died take over.
+ */
+ if (unlikely(lock_taken)) {
+ /* For requeue we need to fixup the pi_futex */
+ if (curval & FUTEX_WAITER_REQUEUED)
+ set_pi_futex_owner(hb, &q.key, curr);
goto out_unlock_release_sem;
}
@@ -1787,34 +1813,40 @@ static int futex_lock_pi(u32 __user *uad
ret = lookup_pi_state(uval, hb, &q.key, &q.pi_state);
if (unlikely(ret)) {
- /*
- * There were no waiters and the owner task lookup
- * failed. When the OWNER_DIED bit is set, then we
- * know that this is a robust futex and we actually
- * take the lock. This is safe as we are protected by
- * the hash bucket lock. We also set the waiters bit
- * unconditionally here, to simplify glibc handling of
- * multiple tasks racing to acquire the lock and
- * cleanup the problems which were left by the dead
- * owner.
- */
- if (curval & FUTEX_OWNER_DIED) {
- uval = newval;
- newval = current->pid |
- FUTEX_OWNER_DIED | FUTEX_WAITERS;
-
- pagefault_disable();
- curval = futex_atomic_cmpxchg_inatomic(uaddr,
- uval, newval);
- pagefault_enable();
+ switch (ret) {
- if (unlikely(curval == -EFAULT))
+ case -EAGAIN:
+ /*
+ * Task is exiting and we just wait for the
+ * exit to complete.
+ */
+ queue_unlock(&q, hb);
+ if (fshared)
+ up_read(fshared);
+ cond_resched();
+ goto retry;
+
+ case -ESRCH:
+ /*
+ * No owner found for this futex. Check if the
+ * OWNER_DIED bit is set to figure out whether
+ * this is a robust futex or not.
+ */
+ if (get_futex_value_locked(&curval, uaddr))
goto uaddr_faulted;
- if (unlikely(curval != uval))
+
+ /*
+ * We simply start over in case of a robust
+ * futex. The code above will take the futex
+ * and return happy.
+ */
+ if (curval & FUTEX_OWNER_DIED) {
+ ownerdied = 1;
goto retry_locked;
- ret = 0;
+ }
+ default:
+ goto out_unlock_release_sem;
}
- goto out_unlock_release_sem;
}
/*
@@ -1845,31 +1877,42 @@ static int futex_lock_pi(u32 __user *uad
down_read(fshared);
spin_lock(q.lock_ptr);
- /*
- * Got the lock. We might not be the anticipated owner if we
- * did a lock-steal - fix up the PI-state in that case.
- */
- if (!ret && q.pi_state->owner != curr)
- /* mmap_sem is unlocked at return of this function */
- ret = fixup_pi_state_owner(uaddr, fshared, &q, hb, curr);
- else {
+ if (!ret) {
+ /*
+ * Got the lock. We might not be the anticipated owner
+ * if we did a lock-steal - fix up the PI-state in
+ * that case:
+ */
+ if (q.pi_state->owner != curr)
+ ret = fixup_pi_state_owner(uaddr, &q, curr);
+ } else {
/*
* Catch the rare case, where the lock was released
- * when we were on the way back before we locked
- * the hash bucket.
+ * when we were on the way back before we locked the
+ * hash bucket.
*/
- if (ret && q.pi_state->owner == curr) {
- if (rt_mutex_trylock(&q.pi_state->pi_mutex))
- ret = 0;
+ if (q.pi_state->owner == curr &&
+ rt_mutex_trylock(&q.pi_state->pi_mutex)) {
+ ret = 0;
+ } else {
+ /*
+ * Paranoia check. If we did not take the lock
+ * in the trylock above, then we should not be
+ * the owner of the rtmutex, neither the real
+ * nor the pending one:
+ */
+ if (rt_mutex_owner(&q.pi_state->pi_mutex) == curr)
+ printk(KERN_ERR "futex_lock_pi: ret = %d "
+ "pi-mutex: %p pi-state %p\n", ret,
+ q.pi_state->pi_mutex.owner,
+ q.pi_state->owner);
}
- /* Unqueue and drop the lock */
- unqueue_me_pi(&q);
- if (fshared)
- up_read(fshared);
}
- if (!detect && ret == -EDEADLK && 0)
- force_sig(SIGKILL, current);
+ /* Unqueue and drop the lock */
+ unqueue_me_pi(&q);
+ if (fshared)
+ up_read(fshared);
return ret != -EINTR ? ret : -ERESTARTNOINTR;
@@ -1887,16 +1930,19 @@ static int futex_lock_pi(u32 __user *uad
* non-atomically. Therefore, if get_user below is not
* enough, we need to handle the fault ourselves, while
* still holding the mmap_sem.
+ *
+ * ... and hb->lock. :-) --ANK
*/
+ queue_unlock(&q, hb);
+
if (attempt++) {
ret = futex_handle_fault((unsigned long)uaddr, fshared,
attempt);
if (ret)
- goto out_unlock_release_sem;
- goto retry_locked;
+ goto out_release_sem;
+ goto retry_unlocked;
}
- queue_unlock(&q, hb);
if (fshared)
up_read(fshared);
@@ -1940,9 +1986,9 @@ retry:
goto out;
hb = hash_futex(&key);
+retry_unlocked:
spin_lock(&hb->lock);
-retry_locked:
/*
* To avoid races, try to do the TID -> 0 atomic transition
* again. If it succeeds then we can return without waking
@@ -2005,16 +2051,19 @@ pi_faulted:
* non-atomically. Therefore, if get_user below is not
* enough, we need to handle the fault ourselves, while
* still holding the mmap_sem.
+ *
+ * ... and hb->lock. --ANK
*/
+ spin_unlock(&hb->lock);
+
if (attempt++) {
ret = futex_handle_fault((unsigned long)uaddr, fshared,
attempt);
if (ret)
- goto out_unlock;
- goto retry_locked;
+ goto out;
+ goto retry_unlocked;
}
- spin_unlock(&hb->lock);
if (fshared)
up_read(fshared);
Index: linux-2.6.22-rc4/include/linux/sched.h
===================================================================
--- linux-2.6.22-rc4.orig/include/linux/sched.h 2007-06-08 01:42:29.000000000 +0200
+++ linux-2.6.22-rc4/include/linux/sched.h 2007-06-08 01:45:13.000000000 +0200
@@ -1162,6 +1162,7 @@ static inline void put_task_struct(struc
/* Not implemented yet, only for 486*/
#define PF_STARTING 0x00000002 /* being created */
#define PF_EXITING 0x00000004 /* getting shut down */
+#define PF_EXITPIDONE 0x00000008 /* pi exit done on shut down */
#define PF_FORKNOEXEC 0x00000040 /* forked but didn't exec */
#define PF_SUPERPRIV 0x00000100 /* used super-user privileges */
#define PF_DUMPCORE 0x00000200 /* dumped core */
Index: linux-2.6.22-rc4/kernel/exit.c
===================================================================
--- linux-2.6.22-rc4.orig/kernel/exit.c 2007-06-08 01:42:29.000000000 +0200
+++ linux-2.6.22-rc4/kernel/exit.c 2007-06-08 01:45:13.000000000 +0200
@@ -892,13 +892,29 @@ fastcall NORET_TYPE void do_exit(long co
if (unlikely(tsk->flags & PF_EXITING)) {
printk(KERN_ALERT
"Fixing recursive fault but reboot is needed!\n");
+ /*
+ * We can do this unlocked here. The futex code uses
+ * this flag just to verify whether the pi state
+ * cleanup has been done or not. In the worst case it
+ * loops once more. We pretend that the cleanup was
+ * done as there is no way to return. Either the
+ * OWNER_DIED bit is set by now or we push the blocked
+ * task into the wait for ever nirwana as well.
+ */
+ tsk->flags |= PF_EXITPIDONE;
if (tsk->io_context)
exit_io_context();
set_current_state(TASK_UNINTERRUPTIBLE);
schedule();
}
+ /*
+ * tsk->flags are checked in the futex code to protect against
+ * an exiting task cleaning up the robust pi futexes.
+ */
+ spin_lock_irq(&tsk->pi_lock);
tsk->flags |= PF_EXITING;
+ spin_unlock_irq(&tsk->pi_lock);
if (unlikely(in_atomic()))
printk(KERN_INFO "note: %s[%d] exited with preempt_count %d\n",
@@ -912,7 +928,7 @@ fastcall NORET_TYPE void do_exit(long co
}
group_dead = atomic_dec_and_test(&tsk->signal->live);
if (group_dead) {
- hrtimer_cancel(&tsk->signal->real_timer);
+ hrtimer_cancel(&tsk->signal->real_timer);
exit_itimers(tsk->signal);
}
acct_collect(code, group_dead);
@@ -965,6 +981,12 @@ fastcall NORET_TYPE void do_exit(long co
* Make sure we are holding no locks:
*/
debug_check_no_locks_held(tsk);
+ /*
+ * We can do this unlocked here. The futex code uses this flag
+ * just to verify whether the pi state cleanup has been done
+ * or not. In the worst case it loops once more.
+ */
+ tsk->flags |= PF_EXITPIDONE;
if (tsk->io_context)
exit_io_context();
--
^ permalink raw reply [flat|nested] 9+ messages in thread* [patch 4/4] FUTEX: Tidy up the code
2007-06-08 0:29 [patch 0/4] PI-Futex bugfixes and cleanups Thomas Gleixner
` (2 preceding siblings ...)
2007-06-08 0:29 ` [patch 3/4] pi-futex: Fix exit races and locking problems Thomas Gleixner
@ 2007-06-08 0:29 ` Thomas Gleixner
2007-06-08 7:10 ` Ingo Molnar
3 siblings, 1 reply; 9+ messages in thread
From: Thomas Gleixner @ 2007-06-08 0:29 UTC (permalink / raw)
To: Andrew Morton; +Cc: LKML, Ingo Molnar, Steven Rostedt, Eric Dumazet
[-- Attachment #1: futex-tidy-up.patch --]
[-- Type: text/plain, Size: 14535 bytes --]
The recent PRIVATE and REQUEUE_PI changes to the futex code made it hard to read.
Tidy it up.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
kernel/futex.c | 192 ++++++++++++++++++++----------------------------
kernel/rtmutex-debug.c | 6 -
kernel/rtmutex.c | 6 -
kernel/rtmutex_common.h | 7 +
4 files changed, 88 insertions(+), 123 deletions(-)
Index: linux-2.6.22-rc4/kernel/rtmutex-debug.c
===================================================================
--- linux-2.6.22-rc4.orig/kernel/rtmutex-debug.c 2007-06-08 01:42:29.000000000 +0200
+++ linux-2.6.22-rc4/kernel/rtmutex-debug.c 2007-06-08 01:48:02.000000000 +0200
@@ -29,12 +29,6 @@
#include "rtmutex_common.h"
-#ifdef CONFIG_DEBUG_RT_MUTEXES
-# include "rtmutex-debug.h"
-#else
-# include "rtmutex.h"
-#endif
-
# define TRACE_WARN_ON(x) WARN_ON(x)
# define TRACE_BUG_ON(x) BUG_ON(x)
Index: linux-2.6.22-rc4/kernel/rtmutex.c
===================================================================
--- linux-2.6.22-rc4.orig/kernel/rtmutex.c 2007-06-08 01:44:54.000000000 +0200
+++ linux-2.6.22-rc4/kernel/rtmutex.c 2007-06-08 01:48:02.000000000 +0200
@@ -17,12 +17,6 @@
#include "rtmutex_common.h"
-#ifdef CONFIG_DEBUG_RT_MUTEXES
-# include "rtmutex-debug.h"
-#else
-# include "rtmutex.h"
-#endif
-
/*
* lock->owner state tracking:
*
Index: linux-2.6.22-rc4/kernel/rtmutex_common.h
===================================================================
--- linux-2.6.22-rc4.orig/kernel/rtmutex_common.h 2007-06-08 01:42:29.000000000 +0200
+++ linux-2.6.22-rc4/kernel/rtmutex_common.h 2007-06-08 01:48:02.000000000 +0200
@@ -154,4 +154,11 @@ extern int rt_mutex_adjust_prio_chain(st
struct task_struct *top_task);
extern void remove_waiter(struct rt_mutex *lock,
struct rt_mutex_waiter *waiter);
+
+#ifdef CONFIG_DEBUG_RT_MUTEXES
+# include "rtmutex-debug.h"
+#else
+# include "rtmutex.h"
+#endif
+
#endif
Index: linux-2.6.22-rc4/kernel/futex.c
===================================================================
--- linux-2.6.22-rc4.orig/kernel/futex.c 2007-06-08 01:46:59.000000000 +0200
+++ linux-2.6.22-rc4/kernel/futex.c 2007-06-08 01:48:02.000000000 +0200
@@ -56,12 +56,6 @@
#include "rtmutex_common.h"
-#ifdef CONFIG_DEBUG_RT_MUTEXES
-# include "rtmutex-debug.h"
-#else
-# include "rtmutex.h"
-#endif
-
#define FUTEX_HASHBITS (CONFIG_BASE_SMALL ? 4 : 8)
/*
@@ -133,6 +127,24 @@ static struct futex_hash_bucket futex_qu
static struct vfsmount *futex_mnt;
/*
+ * Take mm->mmap_sem, when futex is shared
+ */
+static inline void futex_lock_mm(struct rw_semaphore *fshared)
+{
+ if (fshared)
+ down_read(fshared);
+}
+
+/*
+ * Release mm->mmap_sem, when the futex is shared
+ */
+static inline void futex_unlock_mm(struct rw_semaphore *fshared)
+{
+ if (fshared)
+ up_read(fshared);
+}
+
+/*
* We hash on the keys returned from get_futex_key (see below).
*/
static struct futex_hash_bucket *hash_futex(union futex_key *key)
@@ -302,7 +314,18 @@ void drop_futex_key_refs(union futex_key
}
EXPORT_SYMBOL_GPL(drop_futex_key_refs);
-static inline int get_futex_value_locked(u32 *dest, u32 __user *from)
+static u32 cmpxchg_futex_value_locked(u32 __user *uaddr, u32 uval, u32 newval)
+{
+ u32 curval;
+
+ pagefault_disable();
+ curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval);
+ pagefault_enable();
+
+ return curval;
+}
+
+static int get_futex_value_locked(u32 *dest, u32 __user *from)
{
int ret;
@@ -424,14 +447,12 @@ static struct task_struct * futex_find_g
rcu_read_lock();
p = find_task_by_pid(pid);
- if (!p)
- goto out_unlock;
- if ((current->euid != p->euid) && (current->euid != p->uid)) {
- p = NULL;
- goto out_unlock;
- }
- get_task_struct(p);
-out_unlock:
+
+ if (!p || ((current->euid != p->euid) && (current->euid != p->uid)))
+ p = ERR_PTR(-ESRCH);
+ else
+ get_task_struct(p);
+
rcu_read_unlock();
return p;
@@ -639,9 +660,7 @@ static int wake_futex_pi(u32 __user *uad
/* Keep the FUTEX_WAITER_REQUEUED flag if it was set */
newval |= (uval & FUTEX_WAITER_REQUEUED);
- pagefault_disable();
- curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval);
- pagefault_enable();
+ curval = cmpxchg_futex_value_locked(uaddr, uval, newval);
if (curval == -EFAULT)
ret = -EFAULT;
@@ -678,9 +697,7 @@ static int unlock_futex_pi(u32 __user *u
* There is no waiter, so we unlock the futex. The owner died
* bit has not to be preserved here. We are the owner:
*/
- pagefault_disable();
- oldval = futex_atomic_cmpxchg_inatomic(uaddr, uval, 0);
- pagefault_enable();
+ oldval = cmpxchg_futex_value_locked(uaddr, uval, 0);
if (oldval == -EFAULT)
return oldval;
@@ -719,8 +736,7 @@ static int futex_wake(u32 __user *uaddr,
union futex_key key;
int ret;
- if (fshared)
- down_read(fshared);
+ futex_lock_mm(fshared);
ret = get_futex_key(uaddr, fshared, &key);
if (unlikely(ret != 0))
@@ -744,8 +760,7 @@ static int futex_wake(u32 __user *uaddr,
spin_unlock(&hb->lock);
out:
- if (fshared)
- up_read(fshared);
+ futex_unlock_mm(fshared);
return ret;
}
@@ -780,9 +795,7 @@ retry:
uval = curval;
newval = uval | FUTEX_WAITERS | FUTEX_WAITER_REQUEUED;
- pagefault_disable();
- curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval);
- pagefault_enable();
+ curval = cmpxchg_futex_value_locked(uaddr, uval, newval);
if (unlikely(curval == -EFAULT))
return -EFAULT;
@@ -830,11 +843,7 @@ static int futex_requeue_pi(u32 __user *
return -ENOMEM;
retry:
- /*
- * First take all the futex related locks:
- */
- if (fshared)
- down_read(fshared);
+ futex_lock_mm(fshared);
ret = get_futex_key(uaddr1, fshared, &key1);
if (unlikely(ret != 0))
@@ -862,8 +871,7 @@ retry:
* If we would have faulted, release mmap_sem, fault
* it in and start all over again.
*/
- if (fshared)
- up_read(fshared);
+ futex_unlock_mm(fshared);
ret = get_user(curval, uaddr1);
@@ -997,8 +1005,7 @@ out_unlock:
drop_futex_key_refs(&key1);
out:
- if (fshared)
- up_read(fshared);
+ futex_unlock_mm(fshared);
return ret;
}
@@ -1018,8 +1025,7 @@ futex_wake_op(u32 __user *uaddr1, struct
int ret, op_ret, attempt = 0;
retryfull:
- if (fshared)
- down_read(fshared);
+ futex_lock_mm(fshared);
ret = get_futex_key(uaddr1, fshared, &key1);
if (unlikely(ret != 0))
@@ -1065,7 +1071,7 @@ retry:
*/
if (attempt++) {
ret = futex_handle_fault((unsigned long)uaddr2,
- fshared, attempt);
+ fshared, attempt);
if (ret)
goto out;
goto retry;
@@ -1075,8 +1081,7 @@ retry:
* If we would have faulted, release mmap_sem,
* fault it in and start all over again.
*/
- if (fshared)
- up_read(fshared);
+ futex_unlock_mm(fshared);
ret = get_user(dummy, uaddr2);
if (ret)
@@ -1113,8 +1118,7 @@ retry:
if (hb1 != hb2)
spin_unlock(&hb2->lock);
out:
- if (fshared)
- up_read(fshared);
+ futex_unlock_mm(fshared);
return ret;
}
@@ -1133,8 +1137,7 @@ static int futex_requeue(u32 __user *uad
int ret, drop_count = 0;
retry:
- if (fshared)
- down_read(fshared);
+ futex_lock_mm(fshared);
ret = get_futex_key(uaddr1, fshared, &key1);
if (unlikely(ret != 0))
@@ -1162,8 +1165,7 @@ static int futex_requeue(u32 __user *uad
* If we would have faulted, release mmap_sem, fault
* it in and start all over again.
*/
- if (fshared)
- up_read(fshared);
+ futex_unlock_mm(fshared);
ret = get_user(curval, uaddr1);
@@ -1216,8 +1218,7 @@ out_unlock:
drop_futex_key_refs(&key1);
out:
- if (fshared)
- up_read(fshared);
+ futex_unlock_mm(fshared);
return ret;
}
@@ -1386,10 +1387,7 @@ static int fixup_pi_state_owner(u32 __us
newval = (uval & FUTEX_OWNER_DIED) | newtid;
newval |= (uval & FUTEX_WAITER_REQUEUED);
- pagefault_disable();
- curval = futex_atomic_cmpxchg_inatomic(uaddr,
- uval, newval);
- pagefault_enable();
+ curval = cmpxchg_futex_value_locked(uaddr, uval, newval);
if (curval == -EFAULT)
ret = -EFAULT;
@@ -1407,6 +1405,7 @@ static int fixup_pi_state_owner(u32 __us
#define ARG3_SHARED 1
static long futex_wait_restart(struct restart_block *restart);
+
static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared,
u32 val, ktime_t *abs_time)
{
@@ -1421,8 +1420,7 @@ static int futex_wait(u32 __user *uaddr,
q.pi_state = NULL;
retry:
- if (fshared)
- down_read(fshared);
+ futex_lock_mm(fshared);
ret = get_futex_key(uaddr, fshared, &q.key);
if (unlikely(ret != 0))
@@ -1459,8 +1457,7 @@ static int futex_wait(u32 __user *uaddr,
* If we would have faulted, release mmap_sem, fault it in and
* start all over again.
*/
- if (fshared)
- up_read(fshared);
+ futex_unlock_mm(fshared);
ret = get_user(uval, uaddr);
@@ -1487,8 +1484,7 @@ static int futex_wait(u32 __user *uaddr,
* Now the futex is queued and we have checked the data, we
* don't want to hold mmap_sem while we sleep.
*/
- if (fshared)
- up_read(fshared);
+ futex_unlock_mm(fshared);
/*
* There might have been scheduling since the queue_me(), as we
@@ -1548,9 +1544,9 @@ static int futex_wait(u32 __user *uaddr,
struct rt_mutex *lock = &q.pi_state->pi_mutex;
spin_lock(&lock->wait_lock);
- if (unlikely(q.waiter.task)) {
+ if (unlikely(q.waiter.task))
remove_waiter(lock, &q.waiter);
- }
+
spin_unlock(&lock->wait_lock);
if (rem)
@@ -1558,8 +1554,7 @@ static int futex_wait(u32 __user *uaddr,
else
ret = rt_mutex_timed_lock(lock, to, 1);
- if (fshared)
- down_read(fshared);
+ futex_lock_mm(fshared);
spin_lock(q.lock_ptr);
/*
@@ -1589,8 +1584,7 @@ static int futex_wait(u32 __user *uaddr,
/* Unqueue and drop the lock */
unqueue_me_pi(&q);
- if (fshared)
- up_read(fshared);
+ futex_unlock_mm(fshared);
debug_rt_mutex_free_waiter(&q.waiter);
@@ -1628,8 +1622,7 @@ static int futex_wait(u32 __user *uaddr,
queue_unlock(&q, hb);
out_release_sem:
- if (fshared)
- up_read(fshared);
+ futex_unlock_mm(fshared);
return ret;
}
@@ -1721,8 +1714,7 @@ static int futex_lock_pi(u32 __user *uad
q.pi_state = NULL;
retry:
- if (fshared)
- down_read(fshared);
+ futex_lock_mm(fshared);
ret = get_futex_key(uaddr, fshared, &q.key);
if (unlikely(ret != 0))
@@ -1741,9 +1733,7 @@ static int futex_lock_pi(u32 __user *uad
*/
newval = current->pid;
- pagefault_disable();
- curval = futex_atomic_cmpxchg_inatomic(uaddr, 0, newval);
- pagefault_enable();
+ curval = cmpxchg_futex_value_locked(uaddr, 0, newval);
if (unlikely(curval == -EFAULT))
goto uaddr_faulted;
@@ -1787,9 +1777,7 @@ static int futex_lock_pi(u32 __user *uad
lock_taken = 1;
}
- pagefault_disable();
- curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval);
- pagefault_enable();
+ curval = cmpxchg_futex_value_locked(uaddr, uval, newval);
if (unlikely(curval == -EFAULT))
goto uaddr_faulted;
@@ -1821,8 +1809,7 @@ static int futex_lock_pi(u32 __user *uad
* exit to complete.
*/
queue_unlock(&q, hb);
- if (fshared)
- up_read(fshared);
+ futex_unlock_mm(fshared);
cond_resched();
goto retry;
@@ -1858,8 +1845,7 @@ static int futex_lock_pi(u32 __user *uad
* Now the futex is queued and we have checked the data, we
* don't want to hold mmap_sem while we sleep.
*/
- if (fshared)
- up_read(fshared);
+ futex_unlock_mm(fshared);
WARN_ON(!q.pi_state);
/*
@@ -1873,8 +1859,7 @@ static int futex_lock_pi(u32 __user *uad
ret = ret ? 0 : -EWOULDBLOCK;
}
- if (fshared)
- down_read(fshared);
+ futex_lock_mm(fshared);
spin_lock(q.lock_ptr);
if (!ret) {
@@ -1911,8 +1896,7 @@ static int futex_lock_pi(u32 __user *uad
/* Unqueue and drop the lock */
unqueue_me_pi(&q);
- if (fshared)
- up_read(fshared);
+ futex_unlock_mm(fshared);
return ret != -EINTR ? ret : -ERESTARTNOINTR;
@@ -1920,8 +1904,7 @@ static int futex_lock_pi(u32 __user *uad
queue_unlock(&q, hb);
out_release_sem:
- if (fshared)
- up_read(fshared);
+ futex_unlock_mm(fshared);
return ret;
uaddr_faulted:
@@ -1943,8 +1926,7 @@ static int futex_lock_pi(u32 __user *uad
goto retry_unlocked;
}
- if (fshared)
- up_read(fshared);
+ futex_unlock_mm(fshared);
ret = get_user(uval, uaddr);
if (!ret && (uval != -EFAULT))
@@ -1975,11 +1957,8 @@ retry:
*/
if ((uval & FUTEX_TID_MASK) != current->pid)
return -EPERM;
- /*
- * First take all the futex related locks:
- */
- if (fshared)
- down_read(fshared);
+
+ futex_lock_mm(fshared);
ret = get_futex_key(uaddr, fshared, &key);
if (unlikely(ret != 0))
@@ -1994,11 +1973,8 @@ retry_unlocked:
* again. If it succeeds then we can return without waking
* anyone else up:
*/
- if (!(uval & FUTEX_OWNER_DIED)) {
- pagefault_disable();
- uval = futex_atomic_cmpxchg_inatomic(uaddr, current->pid, 0);
- pagefault_enable();
- }
+ if (!(uval & FUTEX_OWNER_DIED))
+ uval = cmpxchg_futex_value_locked(uaddr, current->pid, 0);
if (unlikely(uval == -EFAULT))
goto pi_faulted;
@@ -2040,9 +2016,7 @@ retry_unlocked:
out_unlock:
spin_unlock(&hb->lock);
out:
- if (fshared)
- up_read(fshared);
-
+ futex_unlock_mm(fshared);
return ret;
pi_faulted:
@@ -2064,8 +2038,7 @@ pi_faulted:
goto retry_unlocked;
}
- if (fshared)
- up_read(fshared);
+ futex_unlock_mm(fshared);
ret = get_user(uval, uaddr);
if (!ret && (uval != -EFAULT))
@@ -2122,8 +2095,8 @@ static int futex_fd(u32 __user *uaddr, i
if (printk_timed_ratelimit(&printk_interval, 60 * 60 * 1000)) {
printk(KERN_WARNING "Process `%s' used FUTEX_FD, which "
- "will be removed from the kernel in June 2007\n",
- current->comm);
+ "will be removed from the kernel in June 2007\n",
+ current->comm);
}
ret = -EINVAL;
@@ -2288,9 +2261,8 @@ retry:
* thread-death.) The rest of the cleanup is done in
* userspace.
*/
- mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED;
- /* Also keep the FUTEX_WAITER_REQUEUED flag if set */
- mval |= (uval & FUTEX_WAITER_REQUEUED);
+ mval = uval & (FUTEX_WAITERS | FUTEX_WAITER_REQUEUED);
+ mval |= FUTEX_OWNER_DIED;
nval = futex_atomic_cmpxchg_inatomic(uaddr, uval, mval);
if (nval == -EFAULT)
@@ -2303,10 +2275,8 @@ retry:
* Wake robust non-PI futexes here. The wakeup of
* PI futexes happens in exit_pi_state():
*/
- if (!pi) {
- if (uval & FUTEX_WAITERS)
+ if (!pi && (uval & FUTEX_WAITERS))
futex_wake(uaddr, &curr->mm->mmap_sem, 1);
- }
}
return 0;
}
--
^ permalink raw reply [flat|nested] 9+ messages in thread